diff -r 675d3f76444d Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c --- a/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c Tue Sep 27 22:05:04 2016 +0300 +++ b/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c Tue Sep 27 23:33:56 2016 +0300 @@ -83,28 +83,34 @@ typedef unsigned int UINT32; lowOut = lowIn ^ temp0; \ highOut = highIn ^ temp1; +typedef union { + UINT8 asBytes[8]; + UINT32 asInts[2]; +} laneUnion; + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +#define SWAB4(value) do { \ + (value) = (((value) & 0x0000FFFFu) << 16) | ((value) >> 16); \ + (value) = (((value) & 0x00FF00FFu) << 8) | \ + (((value) >> 8) & 0x00FF00FFu); \ + } while (0); +#endif + void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length) { - UINT8 laneAsBytes[8]; + laneUnion lane; UINT32 low, high; UINT32 temp, temp0, temp1; UINT32 *stateAsHalfLanes = (UINT32*)state; - memset(laneAsBytes, 0xFF, offset); - memset(laneAsBytes+offset, 0x00, length); - memset(laneAsBytes+offset+length, 0xFF, 8-offset-length); -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - low = *((UINT32*)(laneAsBytes+0)); - high = *((UINT32*)(laneAsBytes+4)); -#else - low = laneAsBytes[0] - | ((UINT32)(laneAsBytes[1]) << 8) - | ((UINT32)(laneAsBytes[2]) << 16) - | ((UINT32)(laneAsBytes[3]) << 24); - high = laneAsBytes[4] - | ((UINT32)(laneAsBytes[5]) << 8) - | ((UINT32)(laneAsBytes[6]) << 16) - | ((UINT32)(laneAsBytes[7]) << 24); + memset(lane.asBytes, 0xFF, offset); + memset(lane.asBytes+offset, 0x00, length); + memset(lane.asBytes+offset+length, 0xFF, 8-offset-length); + low = lane.asInts[0]; + high = lane.asInts[1]; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + SWAB4(low); + SWAB4(high); #endif toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); } @@ -141,25 +147,18 @@ void KeccakP1600_AddByte(void *state, un void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) { - UINT8 laneAsBytes[8]; + laneUnion lane; UINT32 low, high; UINT32 temp, temp0, temp1; UINT32 *stateAsHalfLanes = (UINT32*)state; - memset(laneAsBytes, 0, 8); - memcpy(laneAsBytes+offset, data, length); -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - low = *((UINT32*)(laneAsBytes+0)); - high = *((UINT32*)(laneAsBytes+4)); -#else - low = laneAsBytes[0] - | ((UINT32)(laneAsBytes[1]) << 8) - | ((UINT32)(laneAsBytes[2]) << 16) - | ((UINT32)(laneAsBytes[3]) << 24); - high = laneAsBytes[4] - | ((UINT32)(laneAsBytes[5]) << 8) - | ((UINT32)(laneAsBytes[6]) << 16) - | ((UINT32)(laneAsBytes[7]) << 24); + memset(lane.asBytes, 0, 8); + memcpy(lane.asBytes+offset, data, length); + low = lane.asInts[0]; + high = lane.asInts[1]; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + SWAB4(low); + SWAB4(high); #endif toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); } @@ -289,23 +288,16 @@ void KeccakP1600_ExtractBytesInLane(cons { UINT32 *stateAsHalfLanes = (UINT32*)state; UINT32 low, high, temp, temp0, temp1; - UINT8 laneAsBytes[8]; + laneUnion lane; fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - *((UINT32*)(laneAsBytes+0)) = low; - *((UINT32*)(laneAsBytes+4)) = high; -#else - laneAsBytes[0] = low & 0xFF; - laneAsBytes[1] = (low >> 8) & 0xFF; - laneAsBytes[2] = (low >> 16) & 0xFF; - laneAsBytes[3] = (low >> 24) & 0xFF; - laneAsBytes[4] = high & 0xFF; - laneAsBytes[5] = (high >> 8) & 0xFF; - laneAsBytes[6] = (high >> 16) & 0xFF; - laneAsBytes[7] = (high >> 24) & 0xFF; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + SWAB4(low); + SWAB4(high); #endif - memcpy(data, laneAsBytes+offset, length); + lane.asInts[0] = low; + lane.asInts[1] = high; + memcpy(data, lane.asBytes+offset, length); } /* ---------------------------------------------------------------- */ @@ -361,25 +353,18 @@ void KeccakP1600_ExtractAndAddBytesInLan { UINT32 *stateAsHalfLanes = (UINT32*)state; UINT32 low, high, temp, temp0, temp1; - UINT8 laneAsBytes[8]; + laneUnion lane; unsigned int i; fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - *((UINT32*)(laneAsBytes+0)) = low; - *((UINT32*)(laneAsBytes+4)) = high; -#else - laneAsBytes[0] = low & 0xFF; - laneAsBytes[1] = (low >> 8) & 0xFF; - laneAsBytes[2] = (low >> 16) & 0xFF; - laneAsBytes[3] = (low >> 24) & 0xFF; - laneAsBytes[4] = high & 0xFF; - laneAsBytes[5] = (high >> 8) & 0xFF; - laneAsBytes[6] = (high >> 16) & 0xFF; - laneAsBytes[7] = (high >> 24) & 0xFF; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + SWAB4(low); + SWAB4(high); #endif + lane.asInts[0] = low; + lane.asInts[1] = high; for(i=0; i