lilcrypto/impl_poly1305.c

/*
 * Copyright (c) 2024 Lucas Gabriel Vuotto <lucas@lgv5.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include "internal.h"
#include "util.h"


/*
 * Poly1305 implementation.
 *
 * Poly1305 originally designed by Daniel J. Bernstein, "The Poly1305-AES
 * message-authentication code", https://cr.yp.to/mac/poly1305-20050329.pdf .
 *
 * This implementation is written from scratch, but consulting poly1305-donna
 * by Andrew Moon, https://github.com/floodyberry/poly1305-donna, released
 * under MIT license. Similarities are to be expected.
 */

/*
 * Copyright 2011-2016 Andrew Moon <liquidsun@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal in the Software without restriction, including without limitation the
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

/*
 * To ease reduction modulo p = 2^130 - 5, work in base 2^130, as 2^130 = 5 mod
 * p, allowing for easier operations. 2^130 splits evenly into 5 limbs of 26
 * bits.
 *
 * Addition is performed limb-wise:
 *
 * h   =    h4    h3    h2    h1    h0
 * c   =    c4    c3    c2    c1    c0
 * -----------------------------------
 * h+c = h4+c4 h3+c3 h2+c2 h1+c1 h0+c0
 *
 * Carry won't be propagated at this step.
 *
 * Considering h = h + c, multiplication is performed as school multiplication
 * / long multiplication:
 *
 * h   =                            h4    h3    h2    h1    h0
 * r   =                            r4    r3    r2    r1    r0
 * -----------------------------------------------------------
 *                               h4*r0 h3*r0 h2*r0 h1*r0 h0*r0
 *                         h4*r1 h3*r1 h2*r1 h1*r1 h0*r1
 *                   h4*r2 h3*r2 h2*r2 h1*r2 h0*r2
 *             h4*r3 h3*r3 h2*r3 h1*r3 h0*r3
 *       h4*r4 h3*r4 h2*r4 h1*r4 h0*r4
 *
 * Each hn*rn fits in 53 bits. Carry won't be propagated at this step. Partial
 * reduction modulo p starts here:
 *
 *                             2^130
 * h   =                         |    h4    h3    h2    h1    h0
 * r   =                         |    r4    r3    r2    r1    r0
 * ------------------------------|------------------------------
 *                               | h4*r0 h3*r0 h2*r0 h1*r0 h0*r0
 *                         h4*r1 | h3*r1 h2*r1 h1*r1 h0*r1
 *                   h4*r2 h3*r2 | h2*r2 h1*r2 h0*r2
 *             h4*r3 h3*r3 h2*r3 | h1*r3 h0*r3
 *       h4*r4 h3*r4 h2*r4 h1*r4 | h0*r4
 *
 *       2^130
 * h   =   |    h4      h3      h2      h1      h0
 * r   =   |    r4      r3      r2      r1      r0
 * --------|--------------------------------------
 *         | h4*r0   h3*r0   h2*r0   h1*r0   h0*r0
 *         | h3*r1   h2*r1   h1*r1   h0*r1 5*h4*r1
 *         | h2*r2   h1*r2   h0*r2 5*h4*r2 5*h3*r2
 *         | h1*r3   h0*r3 5*h4*r3 5*h3*r3 5*h2*r3
 *         | h0*r4 5*h4*r4 5*h3*r4 5*h2*r4 5*h1*r4
 * --------|--------------------------------------
 * h*r =   |    t4      t3      t2      t1      t0
 *
 * All the carry propagations are performed after this step. h0 is set t0 low
 * 26 bits of t0; h1 thru h4 are set to tn + (tn-1 >> 26) to propagate the
 * carry. t4 might overflow so it needs to be backpropagated to h0 and h1. h1
 * won't carry into h2: given the highest possible h, c, and r,
 *
 * h =  0xffffffffffffffffffffffffffffffff
 * c = 0x1ffffffffffffffffffffffffffffffff
 * r =  0x0ffffffc0ffffffc0ffffffc0fffffff
 *
 * the limbs and t4 before h0 and h1 second propagation are
 *
 * h4 = 0x257ffff
 * h3 = 0x3a95fff
 * h2 = 0x3fea57f
 * h1 = 0x3fffa70
 * h0 = 0x2000002
 * t4 = 0x77fffffa57ffff
 *
 * which becomes
 *
 * h4 = 0x257ffff
 * h3 = 0x3a95fff
 * h2 = 0x3fea57f
 * h1 = 0x3fffa95
 * h0 = 0x3fffff8
 *
 * To perform the final reduction modulo p, observe that each hn is bound by
 * 2^26, which means that h is bound by 2^130. Define minusp = 2^130 - p = 5.
 * - If h < p, minusp + h < 2^136.
 * - If h >= p, then h = p + k with k in {0,1,2,3,4}, and minusp + h =
 *   2^130 - p + p + k = 2^130 + k >= 2^130, and both minusp + h = k mod 2^130
 *   and h = k mod p for all possible values of k.
 *
 * To avoid information leaking via side channels, define g = minusp + h, and
 * select g if bit 130 is set, h otherwise. In particular, define a 32-bit
 * mask = ~(g >> 130) + 1.
 * - If bit 130 of g is 1, mask = ~1 + 1 = 0xffffffff.
 * - If bit 130 of g is 0, mask = ~0 + 1 = 0.
 * Then perform (h & ~mask) | (g & mask).
 */

void
poly1305_block(struct poly1305_state *state, uint32_t hibit)
{
	uint64_t h0, h1, h2, h3, h4, t0, t1, t2, t3, t4;
	uint32_t r0, r1, r2, r3, r4, x1, x2, x3, x4;

	h0 = state->h0;
	h1 = state->h1;
	h2 = state->h2;
	h3 = state->h3;
	h4 = state->h4;
	r0 = state->r0;
	r1 = state->r1;
	r2 = state->r2;
	r3 = state->r3;
	r4 = state->r4;
	x1 = state->x1;
	x2 = state->x2;
	x3 = state->x3;
	x4 = state->x4;

	t0 = load32le(&state->b[0]);
	t1 = load32le(&state->b[4]);
	t2 = load32le(&state->b[8]);
	t3 = load32le(&state->b[12]);
	t4 = hibit;

	h0 += t0 & 0x3ffffff;
	h1 += ((t1 << 6) | (t0 >> 26)) & 0x3ffffff;
	h2 += ((t2 << 12) | (t1 >> 20)) & 0x3ffffff;
	h3 += ((t3 << 18) | (t2 >> 14)) & 0x3ffffff;
	h4 += (t4 << 24) | (t3 >> 8);

	t0 = h0 * r0 + h4 * x1 + h3 * x2 + h2 * x3 + h1 * x4;
	t1 = h1 * r0 + h0 * r1 + h4 * x2 + h3 * x3 + h2 * x4;
	t2 = h2 * r0 + h1 * r1 + h0 * r2 + h4 * x3 + h3 * x4;
	t3 = h3 * r0 + h2 * r1 + h1 * r2 + h0 * r3 + h4 * x4;
	t4 = h4 * r0 + h3 * r1 + h2 * r2 + h1 * r3 + h0 * r4;

	h0 = t0 & 0x3ffffff;
	t1 += t0 >> 26;
	h1 = t1 & 0x3ffffff;
	t2 += t1 >> 26;
	h2 = t2 & 0x3ffffff;
	t3 += t2 >> 26;
	h3 = t3 & 0x3ffffff;
	t4 += t3 >> 26;
	h4 = t4 & 0x3ffffff;

	h0 += 5 * (t4 >> 26);
	h1 += h0 >> 26;
	h0 &= 0x3ffffff;

	state->h0 = h0;
	state->h1 = h1;
	state->h2 = h2;
	state->h3 = h3;
	state->h4 = h4;
}

void
poly1305_reduce(struct poly1305_state *state,
    uint32_t a[POLY1305_TAGLEN_WORDS])
{
	uint64_t t0, t1, t2, t3, t4, g0, g1, g2, g3, g4;
	uint32_t mask;

	t0 = (state->h0 | (state->h1 << 26)) & 0xffffffff;
	t1 = ((state->h1 >> 6) | (state->h2 << 20)) & 0xffffffff;
	t2 = ((state->h2 >> 12) | (state->h3 << 14)) & 0xffffffff;
	t3 = ((state->h3 >> 18) | (state->h4 << 8)) & 0xffffffff;
	t4 = state->h4 >> 24;

	g0 = t0 + 5;
	g1 = t1 + (g0 >> 32);
	g2 = t2 + (g1 >> 32);
	g3 = t3 + (g2 >> 32);
	g4 = t4 + (g3 >> 32);

	mask = ~(g4 >> 2) + 1;

	/*
	 * In the case that t4 t3 t2 t1 t0 > 2^130 - 5, g0 thru g3 will
	 * overflow 32 bits. Given that mask is 32-bits wide, AND-ing it here
	 * will perform the required clamping.
	 */
	t0 = (t0 & ~mask) | (g0 & mask);
	t1 = (t1 & ~mask) | (g1 & mask);
	t2 = (t2 & ~mask) | (g2 & mask);
	t3 = (t3 & ~mask) | (g3 & mask);

	t0 += state->s0;
	t1 += state->s1 + (t0 >> 32);
	t2 += state->s2 + (t1 >> 32);
	t3 += state->s3 + (t2 >> 32);

	a[0] = t0 & 0xffffffff;
	a[1] = t1 & 0xffffffff;
	a[2] = t2 & 0xffffffff;
	a[3] = t3 & 0xffffffff;
}
initial import 2024-05-31 12:59:58 +02:00			`/*`
			`* Copyright (c) 2024 Lucas Gabriel Vuotto <lucas@lgv5.net>`
			`*`
			`* Permission to use, copy, modify, and distribute this software for any`
			`* purpose with or without fee is hereby granted, provided that the above`
			`* copyright notice and this permission notice appear in all copies.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES`
			`* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF`
			`* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR`
			`* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES`
			`* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN`
			`* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF`
			`* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.`
			`*/`

Merge most of internal headers into internal.h Fix the includes accordingly and get rid of some unused ones. 2024-06-17 23:52:07 +02:00			`#include "internal.h"`
initial import 2024-05-31 12:59:58 +02:00			`#include "util.h"`


			`/*`
			`* Poly1305 implementation.`
			`*`
			`* Poly1305 originally designed by Daniel J. Bernstein, "The Poly1305-AES`
			`* message-authentication code", https://cr.yp.to/mac/poly1305-20050329.pdf .`
			`*`
			`* This implementation is written from scratch, but consulting poly1305-donna`
			`* by Andrew Moon, https://github.com/floodyberry/poly1305-donna, released`
			`* under MIT license. Similarities are to be expected.`
			`*/`

			`/*`
			`* Copyright 2011-2016 Andrew Moon <liquidsun@gmail.com>`
			`*`
			`* Permission is hereby granted, free of charge, to any person obtaining a copy`
			`* of this software and associated documentation files (the "Software"), to`
			`* deal in the Software without restriction, including without limitation the`
			`* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or`
			`* sell copies of the Software, and to permit persons to whom the Software is`
			`* furnished to do so, subject to the following conditions:`
			`*`
			`* The above copyright notice and this permission notice shall be included in`
			`* all copies or substantial portions of the Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING`
			`* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS`
			`* IN THE SOFTWARE.`
			`*/`

			`/*`
			`* To ease reduction modulo p = 2^130 - 5, work in base 2^130, as 2^130 = 5 mod`
			`* p, allowing for easier operations. 2^130 splits evenly into 5 limbs of 26`
			`* bits.`
			`*`
			`* Addition is performed limb-wise:`
			`*`
			`* h = h4 h3 h2 h1 h0`
			`* c = c4 c3 c2 c1 c0`
			`* -----------------------------------`
			`* h+c = h4+c4 h3+c3 h2+c2 h1+c1 h0+c0`
			`*`
			`* Carry won't be propagated at this step.`
			`*`
			`* Considering h = h + c, multiplication is performed as school multiplication`
			`* / long multiplication:`
			`*`
			`* h = h4 h3 h2 h1 h0`
			`* r = r4 r3 r2 r1 r0`
			`* -----------------------------------------------------------`
			`* h4r0 h3r0 h2r0 h1r0 h0*r0`
			`* h4r1 h3r1 h2r1 h1r1 h0*r1`
			`* h4r2 h3r2 h2r2 h1r2 h0*r2`
			`* h4r3 h3r3 h2r3 h1r3 h0*r3`
			`* h4r4 h3r4 h2r4 h1r4 h0*r4`
			`*`
			`* Each hn*rn fits in 53 bits. Carry won't be propagated at this step. Partial`
			`* reduction modulo p starts here:`
			`*`
			`* 2^130`
			`* h = \| h4 h3 h2 h1 h0`
			`* r = \| r4 r3 r2 r1 r0`
			`* ------------------------------\|------------------------------`
			`* \| h4r0 h3r0 h2r0 h1r0 h0*r0`
			`* h4r1 \| h3r1 h2r1 h1r1 h0*r1`
			`* h4r2 h3r2 \| h2r2 h1r2 h0*r2`
			`* h4r3 h3r3 h2r3 \| h1r3 h0*r3`
			`* h4r4 h3r4 h2r4 h1r4 \| h0*r4`
			`*`
			`* 2^130`
			`* h = \| h4 h3 h2 h1 h0`
			`* r = \| r4 r3 r2 r1 r0`
			`* --------\|--------------------------------------`
			`* \| h4r0 h3r0 h2r0 h1r0 h0*r0`
			`* \| h3r1 h2r1 h1r1 h0r1 5h4r1`
			`* \| h2r2 h1r2 h0r2 5h4r2 5h3*r2`
			`* \| h1r3 h0r3 5h4r3 5h3r3 5h2r3`
			`* \| h0r4 5h4r4 5h3r4 5h2r4 5h1*r4`
			`* --------\|--------------------------------------`
			`* h*r = \| t4 t3 t2 t1 t0`
			`*`
			`* All the carry propagations are performed after this step. h0 is set t0 low`
			`* 26 bits of t0; h1 thru h4 are set to tn + (tn-1 >> 26) to propagate the`
			`* carry. t4 might overflow so it needs to be backpropagated to h0 and h1. h1`
			`* won't carry into h2: given the highest possible h, c, and r,`
			`*`
			`* h = 0xffffffffffffffffffffffffffffffff`
			`* c = 0x1ffffffffffffffffffffffffffffffff`
			`* r = 0x0ffffffc0ffffffc0ffffffc0fffffff`
			`*`
			`* the limbs and t4 before h0 and h1 second propagation are`
			`*`
			`* h4 = 0x257ffff`
			`* h3 = 0x3a95fff`
			`* h2 = 0x3fea57f`
			`* h1 = 0x3fffa70`
			`* h0 = 0x2000002`
			`* t4 = 0x77fffffa57ffff`
			`*`
Zap dangling whitespaces 2024-06-10 19:44:20 +02:00			`* which becomes`
initial import 2024-05-31 12:59:58 +02:00			`*`
			`* h4 = 0x257ffff`
			`* h3 = 0x3a95fff`
			`* h2 = 0x3fea57f`
			`* h1 = 0x3fffa95`
			`* h0 = 0x3fffff8`
			`*`
			`* To perform the final reduction modulo p, observe that each hn is bound by`
impl/poly1305: it's enough to overflow 2^130 Going for 2^136 doesn't give anything extra, and is one additional addition. 2024-06-30 16:05:28 +02:00			`* 2^26, which means that h is bound by 2^130. Define minusp = 2^130 - p = 5.`
initial import 2024-05-31 12:59:58 +02:00			`* - If h < p, minusp + h < 2^136.`
			`* - If h >= p, then h = p + k with k in {0,1,2,3,4}, and minusp + h =`
impl/poly1305: it's enough to overflow 2^130 Going for 2^136 doesn't give anything extra, and is one additional addition. 2024-06-30 16:05:28 +02:00			`* 2^130 - p + p + k = 2^130 + k >= 2^130, and both minusp + h = k mod 2^130`
initial import 2024-05-31 12:59:58 +02:00			`* and h = k mod p for all possible values of k.`
			`*`
			`* To avoid information leaking via side channels, define g = minusp + h, and`
impl/poly1305: it's enough to overflow 2^130 Going for 2^136 doesn't give anything extra, and is one additional addition. 2024-06-30 16:05:28 +02:00			`* select g if bit 130 is set, h otherwise. In particular, define a 32-bit`
			`* mask = ~(g >> 130) + 1.`
			`* - If bit 130 of g is 1, mask = ~1 + 1 = 0xffffffff.`
			`* - If bit 130 of g is 0, mask = ~0 + 1 = 0.`
initial import 2024-05-31 12:59:58 +02:00			`* Then perform (h & ~mask) \| (g & mask).`
			`*/`

			`void`
Rename all algorithm-specific ctx to state 2024-06-15 23:13:31 +02:00			`poly1305_block(struct poly1305_state *state, uint32_t hibit)`
initial import 2024-05-31 12:59:58 +02:00			`{`
			`uint64_t h0, h1, h2, h3, h4, t0, t1, t2, t3, t4;`
			`uint32_t r0, r1, r2, r3, r4, x1, x2, x3, x4;`

Rename all algorithm-specific ctx to state 2024-06-15 23:13:31 +02:00			`h0 = state->h0;`
			`h1 = state->h1;`
			`h2 = state->h2;`
			`h3 = state->h3;`
			`h4 = state->h4;`
			`r0 = state->r0;`
			`r1 = state->r1;`
			`r2 = state->r2;`
			`r3 = state->r3;`
			`r4 = state->r4;`
			`x1 = state->x1;`
			`x2 = state->x2;`
			`x3 = state->x3;`
			`x4 = state->x4;`

Rename the state block buffers from m{,len} to b{,len} 2024-06-15 23:22:12 +02:00			`t0 = load32le(&state->b[0]);`
			`t1 = load32le(&state->b[4]);`
			`t2 = load32le(&state->b[8]);`
			`t3 = load32le(&state->b[12]);`
initial import 2024-05-31 12:59:58 +02:00			`t4 = hibit;`

			`h0 += t0 & 0x3ffffff;`
			`h1 += ((t1 << 6) \| (t0 >> 26)) & 0x3ffffff;`
			`h2 += ((t2 << 12) \| (t1 >> 20)) & 0x3ffffff;`
			`h3 += ((t3 << 18) \| (t2 >> 14)) & 0x3ffffff;`
			`h4 += (t4 << 24) \| (t3 >> 8);`

			`t0 = h0 * r0 + h4 * x1 + h3 * x2 + h2 * x3 + h1 * x4;`
			`t1 = h1 * r0 + h0 * r1 + h4 * x2 + h3 * x3 + h2 * x4;`
			`t2 = h2 * r0 + h1 * r1 + h0 * r2 + h4 * x3 + h3 * x4;`
			`t3 = h3 * r0 + h2 * r1 + h1 * r2 + h0 * r3 + h4 * x4;`
			`t4 = h4 * r0 + h3 * r1 + h2 * r2 + h1 * r3 + h0 * r4;`

			`h0 = t0 & 0x3ffffff;`
			`t1 += t0 >> 26;`
			`h1 = t1 & 0x3ffffff;`
			`t2 += t1 >> 26;`
			`h2 = t2 & 0x3ffffff;`
			`t3 += t2 >> 26;`
			`h3 = t3 & 0x3ffffff;`
			`t4 += t3 >> 26;`
			`h4 = t4 & 0x3ffffff;`

			`h0 += 5 * (t4 >> 26);`
			`h1 += h0 >> 26;`
			`h0 &= 0x3ffffff;`

Rename all algorithm-specific ctx to state 2024-06-15 23:13:31 +02:00			`state->h0 = h0;`
			`state->h1 = h1;`
			`state->h2 = h2;`
			`state->h3 = h3;`
			`state->h4 = h4;`
initial import 2024-05-31 12:59:58 +02:00			`}`

			`void`
Rename all algorithm-specific ctx to state 2024-06-15 23:13:31 +02:00			`poly1305_reduce(struct poly1305_state *state,`
			`uint32_t a[POLY1305_TAGLEN_WORDS])`
initial import 2024-05-31 12:59:58 +02:00			`{`
			`uint64_t t0, t1, t2, t3, t4, g0, g1, g2, g3, g4;`
			`uint32_t mask;`

Rename all algorithm-specific ctx to state 2024-06-15 23:13:31 +02:00			`t0 = (state->h0 \| (state->h1 << 26)) & 0xffffffff;`
			`t1 = ((state->h1 >> 6) \| (state->h2 << 20)) & 0xffffffff;`
			`t2 = ((state->h2 >> 12) \| (state->h3 << 14)) & 0xffffffff;`
			`t3 = ((state->h3 >> 18) \| (state->h4 << 8)) & 0xffffffff;`
			`t4 = state->h4 >> 24;`
initial import 2024-05-31 12:59:58 +02:00
			`g0 = t0 + 5;`
			`g1 = t1 + (g0 >> 32);`
			`g2 = t2 + (g1 >> 32);`
			`g3 = t3 + (g2 >> 32);`
impl/poly1305: it's enough to overflow 2^130 Going for 2^136 doesn't give anything extra, and is one additional addition. 2024-06-30 16:05:28 +02:00			`g4 = t4 + (g3 >> 32);`
initial import 2024-05-31 12:59:58 +02:00
impl/poly1305: it's enough to overflow 2^130 Going for 2^136 doesn't give anything extra, and is one additional addition. 2024-06-30 16:05:28 +02:00			`mask = ~(g4 >> 2) + 1;`
initial import 2024-05-31 12:59:58 +02:00
impl/poly1305: add a comment Explain why it's fine to skip clamping while doing t4 t3 t2 t1 t0 + 5. 2024-06-30 16:10:46 +02:00			`/*`
			`* In the case that t4 t3 t2 t1 t0 > 2^130 - 5, g0 thru g3 will`
			`* overflow 32 bits. Given that mask is 32-bits wide, AND-ing it here`
			`* will perform the required clamping.`
			`*/`
initial import 2024-05-31 12:59:58 +02:00			`t0 = (t0 & ~mask) \| (g0 & mask);`
			`t1 = (t1 & ~mask) \| (g1 & mask);`
			`t2 = (t2 & ~mask) \| (g2 & mask);`
			`t3 = (t3 & ~mask) \| (g3 & mask);`

Rename all algorithm-specific ctx to state 2024-06-15 23:13:31 +02:00			`t0 += state->s0;`
			`t1 += state->s1 + (t0 >> 32);`
			`t2 += state->s2 + (t1 >> 32);`
			`t3 += state->s3 + (t2 >> 32);`
initial import 2024-05-31 12:59:58 +02:00
			`a[0] = t0 & 0xffffffff;`
			`a[1] = t1 & 0xffffffff;`
			`a[2] = t2 & 0xffffffff;`
			`a[3] = t3 & 0xffffffff;`
			`}`