/* adler32.c -- compute the Adler-32 checksum of a data stream * Copyright (C) 1995-2011, 2016 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* @(#) $Id$ */ #include "zutil.h" #define BASE 65521U /* largest prime smaller than 65536 */ #define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ #define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} #define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); #define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); #define DO16(buf) DO8(buf,0); DO8(buf,8); /* use NO_DIVIDE if your processor does not do division in hardware -- try it both ways to see which is faster */ #ifdef NO_DIVIDE /* note that this assumes BASE is 65521, where 65536 % 65521 == 15 (thank you to John Reiser for pointing this out) */ # define CHOP(a) \ do { \ unsigned long tmp = a >> 16; \ a &= 0xffffUL; \ a += (tmp << 4) - tmp; \ } while (0) # define MOD28(a) \ do { \ CHOP(a); \ if (a >= BASE) a -= BASE; \ } while (0) # define MOD(a) \ do { \ CHOP(a); \ MOD28(a); \ } while (0) # define MOD63(a) \ do { /* this assumes a is not negative */ \ z_off64_t tmp = a >> 32; \ a &= 0xffffffffL; \ a += (tmp << 8) - (tmp << 5) + tmp; \ tmp = a >> 16; \ a &= 0xffffL; \ a += (tmp << 4) - tmp; \ tmp = a >> 16; \ a &= 0xffffL; \ a += (tmp << 4) - tmp; \ if (a >= BASE) a -= BASE; \ } while (0) #else # define MOD(a) a %= BASE # define MOD28(a) a %= BASE # define MOD63(a) a %= BASE #endif #include "cpu_features.h" #if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) || defined(ADLER32_SIMD_RVV) #include "adler32_simd.h" #endif /* ========================================================================= */ uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) { unsigned long sum2; unsigned n; /* TODO(cavalcantii): verify if this lengths are optimal for current CPUs. */ #if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) \ || defined(ADLER32_SIMD_RVV) #if defined(ADLER32_SIMD_SSSE3) if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3) #elif defined(ADLER32_SIMD_NEON) if (buf != Z_NULL && len >= 64) #elif defined(ADLER32_SIMD_RVV) if (buf != Z_NULL && len >= 32 && riscv_cpu_enable_rvv) #endif return adler32_simd_(adler, buf, len); #endif /* split Adler-32 into component sums */ sum2 = (adler >> 16) & 0xffff; adler &= 0xffff; /* in case user likes doing a byte at a time, keep it fast */ if (len == 1) { adler += buf[0]; if (adler >= BASE) adler -= BASE; sum2 += adler; if (sum2 >= BASE) sum2 -= BASE; return adler | (sum2 << 16); } #if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) \ || defined(RISCV_RVV) /* * Use SIMD to compute the adler32. Since this function can be * freely used, check CPU features here. zlib convention is to * call adler32(0, NULL, 0), before making calls to adler32(). * So this is a good early (and infrequent) place to cache CPU * features for those later, more interesting adler32() calls. */ if (buf == Z_NULL) { if (!len) /* Assume user is calling adler32(0, NULL, 0); */ cpu_check_features(); return 1L; } #else /* initial Adler-32 value (deferred check for len == 1 speed) */ if (buf == Z_NULL) return 1L; #endif /* in case short lengths are provided, keep it somewhat fast */ if (len < 16) { while (len--) { adler += *buf++; sum2 += adler; } if (adler >= BASE) adler -= BASE; MOD28(sum2); /* only added so many BASE's */ return adler | (sum2 << 16); } /* do length NMAX blocks -- requires just one modulo operation */ while (len >= NMAX) { len -= NMAX; n = NMAX / 16; /* NMAX is divisible by 16 */ do { DO16(buf); /* 16 sums unrolled */ buf += 16; } while (--n); MOD(adler); MOD(sum2); } /* do remaining bytes (less than NMAX, still just one modulo) */ if (len) { /* avoid modulos if none remaining */ while (len >= 16) { len -= 16; DO16(buf); buf += 16; } while (len--) { adler += *buf++; sum2 += adler; } MOD(adler); MOD(sum2); } /* return recombined sums */ return adler | (sum2 << 16); } /* ========================================================================= */ uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) { return adler32_z(adler, buf, len); } /* ========================================================================= */ local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) { unsigned long sum1; unsigned long sum2; unsigned rem; /* for negative len, return invalid adler32 as a clue for debugging */ if (len2 < 0) return 0xffffffffUL; /* the derivation of this formula is left as an exercise for the reader */ MOD63(len2); /* assumes len2 >= 0 */ rem = (unsigned)len2; sum1 = adler1 & 0xffff; sum2 = rem * sum1; MOD(sum2); sum1 += (adler2 & 0xffff) + BASE - 1; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; if (sum1 >= BASE) sum1 -= BASE; if (sum1 >= BASE) sum1 -= BASE; if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); if (sum2 >= BASE) sum2 -= BASE; return sum1 | (sum2 << 16); } /* ========================================================================= */ uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) { return adler32_combine_(adler1, adler2, len2); } uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) { return adler32_combine_(adler1, adler2, len2); }