https://bugs.gentoo.org/931556
https://gcc.gnu.org/PR117800
https://github.com/technion/libscrypt/issues/60
https://github.com/technion/libscrypt/pull/62

From 1c20db581099de69d3f8ce8e06981e7c7e14f2e4 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Wed, 27 Nov 2024 03:07:57 +0000
Subject: [PATCH 1/3] sysendian.h: fix aliasing violations

Accessing a e.g. uint32_t as uint8_t is an aliasing violation. The
helpers here are nearly idiomatic for avoiding that anyway, so fix
them up accordingly.
--- a/sysendian.h
+++ b/sysendian.h
@@ -49,95 +49,77 @@
 #endif
 
 static INLINE uint32_t
-be32dec(const void *pp)
+be32dec(const unsigned char* p)
 {
-	const uint8_t *p = (uint8_t const *)pp;
-
-	return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
-	    ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
+	return ((uint32_t)p[0] << 24) | ((uint32_t)p[1] << 16) | ((uint32_t)p[2] << 8) |
+		((uint32_t)p[3] << 0);
 }
 
 static INLINE void
-be32enc(void *pp, uint32_t x)
+be32enc(unsigned char* p, const uint32_t x)
 {
-	uint8_t * p = (uint8_t *)pp;
-
-	p[3] = x & 0xff;
-	p[2] = (x >> 8) & 0xff;
-	p[1] = (x >> 16) & 0xff;
-	p[0] = (x >> 24) & 0xff;
+	p[0] = (x >> 24);
+	p[1] = (x >> 16);
+	p[2] = (x >> 8);
+	p[3] = (x >> 0);
 }
 
 static INLINE uint64_t
-be64dec(const void *pp)
+be64dec(const unsigned char* p)
 {
-	const uint8_t *p = (uint8_t const *)pp;
-
-	return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) +
-	    ((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) +
-	    ((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) +
-	    ((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56));
+	return ((uint64_t)p[0] << 0) | ((uint64_t)p[1] << 8) | ((uint64_t)p[2] << 16) |
+		((uint64_t)p[3] << 24) | ((uint64_t)p[4] << 32) | ((uint64_t)p[5] << 40) |
+		((uint64_t)p[6] << 48) | ((uint64_t)p[7] << 56);
 }
 
 static INLINE void
-be64enc(void *pp, uint64_t x)
+be64enc(unsigned char* p, const uint64_t x)
 {
-	uint8_t * p = (uint8_t *)pp;
-
-	p[7] = x & 0xff;
-	p[6] = (x >> 8) & 0xff;
-	p[5] = (x >> 16) & 0xff;
-	p[4] = (x >> 24) & 0xff;
-	p[3] = (x >> 32) & 0xff;
-	p[2] = (x >> 40) & 0xff;
-	p[1] = (x >> 48) & 0xff;
-	p[0] = (x >> 56) & 0xff;
+	p[0] = (x >> 56);
+	p[1] = (x >> 48);
+	p[2] = (x >> 40);
+	p[3] = (x >> 32);
+	p[4] = (x >> 24);
+	p[5] = (x >> 16);
+	p[6] = (x >> 8);
+	p[7] = (x >> 0);
 }
 
 static INLINE uint32_t
-le32dec(const void *pp)
+le32dec(const unsigned char* p)
 {
-	const uint8_t *p = (uint8_t const *)pp;
-
-	return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
-	    ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
+	return ((uint32_t)p[0] << 0) | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
+		((uint32_t)p[3] << 24);
 }
 
 static INLINE void
-le32enc(void *pp, uint32_t x)
+le32enc(unsigned char* p, const uint32_t x)
 {
-	uint8_t * p = (uint8_t *)pp;
-
-	p[0] = x & 0xff;
-	p[1] = (x >> 8) & 0xff;
-	p[2] = (x >> 16) & 0xff;
-	p[3] = (x >> 24) & 0xff;
+	p[0] = (x >> 0);
+	p[1] = (x >> 8);
+	p[2] = (x >> 16);
+	p[3] = (x >> 24);
 }
 
 static INLINE uint64_t
-le64dec(const void *pp)
+le64dec(const unsigned char* p)
 {
-	const uint8_t *p = (uint8_t const *)pp;
-
-	return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) +
-	    ((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) +
-	    ((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) +
-	    ((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56));
+	return ((uint64_t)p[0] << 0) | ((uint64_t)p[1] << 8) | ((uint64_t)p[2] << 16) |
+		((uint64_t)p[3] << 24) | ((uint64_t)p[4] << 32) | ((uint64_t)p[5] << 40) |
+		((uint64_t)p[6] << 48) | ((uint64_t)p[7] << 56);
 }
 
 static INLINE void
-le64enc(void *pp, uint64_t x)
+le64enc(unsigned char* p, const uint64_t x)
 {
-	uint8_t * p = (uint8_t *)pp;
-
-	p[0] = x & 0xff;
-	p[1] = (x >> 8) & 0xff;
-	p[2] = (x >> 16) & 0xff;
-	p[3] = (x >> 24) & 0xff;
-	p[4] = (x >> 32) & 0xff;
-	p[5] = (x >> 40) & 0xff;
-	p[6] = (x >> 48) & 0xff;
-	p[7] = (x >> 56) & 0xff;
+	p[0] = (x >> 0);
+	p[1] = (x >> 8);
+	p[2] = (x >> 16);
+	p[3] = (x >> 24);
+	p[4] = (x >> 32);
+	p[5] = (x >> 40);
+	p[6] = (x >> 48);
+	p[7] = (x >> 56);
 }
 #endif /* !HAVE_SYS_ENDIAN_H */
 

From da48d4408fb5364ab20ef98696bb5b0f0388f6f9 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Wed, 27 Nov 2024 03:09:46 +0000
Subject: [PATCH 2/3] b64: fix -Wold-style-definition

Drop K&R decls. Most of the codebase uses ISO decls anyway.
--- a/b64.c
+++ b/b64.c
@@ -120,11 +120,7 @@ static const char Pad64 = '=';
 */
 
 int
-libscrypt_b64_encode(src, srclength, target, targsize)
-	unsigned char const *src;
-	size_t srclength;
-	char *target;
-	size_t targsize;
+libscrypt_b64_encode(unsigned char const *src, size_t srclength, char *target, size_t targsize)
 {
 	size_t datalength = 0;
 	unsigned char input[3];
@@ -184,10 +180,7 @@ libscrypt_b64_encode(src, srclength, target, targsize)
  */
 
 int
-libscrypt_b64_decode(src, target, targsize)
-	char const *src;
-	unsigned char *target;
-	size_t targsize;
+libscrypt_b64_decode(char const *src, unsigned char *target, size_t targsize)
 {
 	int state, ch;
     unsigned int tarindex;

From 7b574b9c517a3d1f9bd0e265a5f287155293cb85 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Wed, 27 Nov 2024 03:48:29 +0000
Subject: [PATCH 3/3] crypto_scrypt-nosse: fix aliasing violations

blkcpy was accessing memory as size_t* where it started as uint32_t*
which is an aliasing violation. But just replace it with memcpy rather
than reinventing the wheel.

It turns out this got fixed in scrypt a while ago, so let's do the same
fix as they did: https://github.com/Tarsnap/scrypt/commit/209fd279c9357010d1dabd446c458dfeb9820e6c.

Closes: https://github.com/technion/libscrypt/issues/60
--- a/crypto_scrypt-nosse.c
+++ b/crypto_scrypt-nosse.c
@@ -41,35 +41,19 @@
 
 #include "libscrypt.h"
 
-static void blkcpy(void *, void *, size_t);
-static void blkxor(void *, void *, size_t);
+static void blkxor(uint32_t * dest, uint32_t * src, size_t len);
 static void salsa20_8(uint32_t[16]);
 static void blockmix_salsa8(uint32_t *, uint32_t *, uint32_t *, size_t);
-static uint64_t integerify(void *, size_t);
+static uint64_t integerify(uint32_t * B, size_t r);
 static void smix(uint8_t *, size_t, uint64_t, uint32_t *, uint32_t *);
 
 static void
-blkcpy(void * dest, void * src, size_t len)
+blkxor(uint32_t * dest, uint32_t * src, size_t len)
 {
-	size_t * D = dest;
-	size_t * S = src;
-	size_t L = len / sizeof(size_t);
 	size_t i;
 
-	for (i = 0; i < L; i++)
-		D[i] = S[i];
-}
-
-static void
-blkxor(void * dest, void * src, size_t len)
-{
-	size_t * D = dest;
-	size_t * S = src;
-	size_t L = len / sizeof(size_t);
-	size_t i;
-
-	for (i = 0; i < L; i++)
-		D[i] ^= S[i];
+	for (i = 0; i < len / 4; i++)
+		dest[i] ^= src[i];
 }
 
 /**
@@ -82,7 +66,7 @@ salsa20_8(uint32_t B[16])
 	uint32_t x[16];
 	size_t i;
 
-	blkcpy(x, B, 64);
+	memcpy(x, B, 64);
 	for (i = 0; i < 8; i += 2) {
 #define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
 		/* Operate on columns. */
@@ -128,7 +112,7 @@ blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r)
 	size_t i;
 
 	/* 1: X <-- B_{2r - 1} */
-	blkcpy(X, &Bin[(2 * r - 1) * 16], 64);
+	memcpy(X, &Bin[(2 * r - 1) * 16], 64);
 
 	/* 2: for i = 0 to 2r - 1 do */
 	for (i = 0; i < 2 * r; i += 2) {
@@ -138,7 +122,7 @@ blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r)
 
 		/* 4: Y_i <-- X */
 		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
-		blkcpy(&Bout[i * 8], X, 64);
+		memcpy(&Bout[i * 8], X, 64);
 
 		/* 3: X <-- H(X \xor B_i) */
 		blkxor(X, &Bin[i * 16 + 16], 64);
@@ -146,7 +130,7 @@ blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r)
 
 		/* 4: Y_i <-- X */
 		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
-		blkcpy(&Bout[i * 8 + r * 16], X, 64);
+		memcpy(&Bout[i * 8 + r * 16], X, 64);
 	}
 }
 
@@ -155,10 +139,9 @@ blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r)
  * Return the result of parsing B_{2r-1} as a little-endian integer.
  */
 static uint64_t
-integerify(void * B, size_t r)
+integerify(uint32_t * B, size_t r)
 {
-	uint32_t * X = (void *)((uintptr_t)(B) + (2 * r - 1) * 64);
-
+	const uint32_t * X = B + (2 * r - 1) * 16;
 	return (((uint64_t)(X[1]) << 32) + X[0]);
 }
 
@@ -187,13 +170,13 @@ smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY)
 	/* 2: for i = 0 to N - 1 do */
 	for (i = 0; i < N; i += 2) {
 		/* 3: V_i <-- X */
-		blkcpy(&V[i * (32 * r)], X, 128 * r);
+		memcpy(&V[i * (32 * r)], X, 128 * r);
 
 		/* 4: X <-- H(X) */
 		blockmix_salsa8(X, Y, Z, r);
 
 		/* 3: V_i <-- X */
-		blkcpy(&V[(i + 1) * (32 * r)], Y, 128 * r);
+		memcpy(&V[(i + 1) * (32 * r)], Y, 128 * r);
 
 		/* 4: X <-- H(X) */
 		blockmix_salsa8(Y, X, Z, r);