/* * Copyright (c) 2024 Lucas Gabriel Vuotto * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include "impl_poly1305.h" #include "util.h" /* * Poly1305 implementation. * * Poly1305 originally designed by Daniel J. Bernstein, "The Poly1305-AES * message-authentication code", https://cr.yp.to/mac/poly1305-20050329.pdf . * * This implementation is written from scratch, but consulting poly1305-donna * by Andrew Moon, https://github.com/floodyberry/poly1305-donna, released * under MIT license. Similarities are to be expected. */ /* * Copyright 2011-2016 Andrew Moon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /* * To ease reduction modulo p = 2^130 - 5, work in base 2^130, as 2^130 = 5 mod * p, allowing for easier operations. 2^130 splits evenly into 5 limbs of 26 * bits. * * Addition is performed limb-wise: * * h = h4 h3 h2 h1 h0 * c = c4 c3 c2 c1 c0 * ----------------------------------- * h+c = h4+c4 h3+c3 h2+c2 h1+c1 h0+c0 * * Carry won't be propagated at this step. * * Considering h = h + c, multiplication is performed as school multiplication * / long multiplication: * * h = h4 h3 h2 h1 h0 * r = r4 r3 r2 r1 r0 * ----------------------------------------------------------- * h4*r0 h3*r0 h2*r0 h1*r0 h0*r0 * h4*r1 h3*r1 h2*r1 h1*r1 h0*r1 * h4*r2 h3*r2 h2*r2 h1*r2 h0*r2 * h4*r3 h3*r3 h2*r3 h1*r3 h0*r3 * h4*r4 h3*r4 h2*r4 h1*r4 h0*r4 * * Each hn*rn fits in 53 bits. Carry won't be propagated at this step. Partial * reduction modulo p starts here: * * 2^130 * h = | h4 h3 h2 h1 h0 * r = | r4 r3 r2 r1 r0 * ------------------------------|------------------------------ * | h4*r0 h3*r0 h2*r0 h1*r0 h0*r0 * h4*r1 | h3*r1 h2*r1 h1*r1 h0*r1 * h4*r2 h3*r2 | h2*r2 h1*r2 h0*r2 * h4*r3 h3*r3 h2*r3 | h1*r3 h0*r3 * h4*r4 h3*r4 h2*r4 h1*r4 | h0*r4 * * 2^130 * h = | h4 h3 h2 h1 h0 * r = | r4 r3 r2 r1 r0 * --------|-------------------------------------- * | h4*r0 h3*r0 h2*r0 h1*r0 h0*r0 * | h3*r1 h2*r1 h1*r1 h0*r1 5*h4*r1 * | h2*r2 h1*r2 h0*r2 5*h4*r2 5*h3*r2 * | h1*r3 h0*r3 5*h4*r3 5*h3*r3 5*h2*r3 * | h0*r4 5*h4*r4 5*h3*r4 5*h2*r4 5*h1*r4 * --------|-------------------------------------- * h*r = | t4 t3 t2 t1 t0 * * All the carry propagations are performed after this step. h0 is set t0 low * 26 bits of t0; h1 thru h4 are set to tn + (tn-1 >> 26) to propagate the * carry. t4 might overflow so it needs to be backpropagated to h0 and h1. h1 * won't carry into h2: given the highest possible h, c, and r, * * h = 0xffffffffffffffffffffffffffffffff * c = 0x1ffffffffffffffffffffffffffffffff * r = 0x0ffffffc0ffffffc0ffffffc0fffffff * * the limbs and t4 before h0 and h1 second propagation are * * h4 = 0x257ffff * h3 = 0x3a95fff * h2 = 0x3fea57f * h1 = 0x3fffa70 * h0 = 0x2000002 * t4 = 0x77fffffa57ffff * * which becomes * * h4 = 0x257ffff * h3 = 0x3a95fff * h2 = 0x3fea57f * h1 = 0x3fffa95 * h0 = 0x3fffff8 * * To perform the final reduction modulo p, observe that each hn is bound by * 2^26, which means that h is bound by 2^130. Define minusp = 2^136 - p. * - If h < p, minusp + h < 2^136. * - If h >= p, then h = p + k with k in {0,1,2,3,4}, and minusp + h = * 2^136 - p + p + k = 2^136 + k >= 2^136, and both minusp + h = k mod 2^136 * and h = k mod p for all possible values of k. * * To avoid information leaking via side channels, define g = minusp + h, and * select g if bit 136 is set, h otherwise. In particular, define a 32-bit * mask = ~(g >> 136) + 1. * - If bit 136 of g is 1, mask = ~1 + 1 = 0xffffffff. * - If bit 136 of g is 0, mask = ~0 + 1 = 0. * Then perform (h & ~mask) | (g & mask). */ void poly1305_block(struct poly1305_state *state, uint32_t hibit) { uint64_t h0, h1, h2, h3, h4, t0, t1, t2, t3, t4; uint32_t r0, r1, r2, r3, r4, x1, x2, x3, x4; h0 = state->h0; h1 = state->h1; h2 = state->h2; h3 = state->h3; h4 = state->h4; r0 = state->r0; r1 = state->r1; r2 = state->r2; r3 = state->r3; r4 = state->r4; x1 = state->x1; x2 = state->x2; x3 = state->x3; x4 = state->x4; t0 = load32le(&state->m[0]); t1 = load32le(&state->m[4]); t2 = load32le(&state->m[8]); t3 = load32le(&state->m[12]); t4 = hibit; h0 += t0 & 0x3ffffff; h1 += ((t1 << 6) | (t0 >> 26)) & 0x3ffffff; h2 += ((t2 << 12) | (t1 >> 20)) & 0x3ffffff; h3 += ((t3 << 18) | (t2 >> 14)) & 0x3ffffff; h4 += (t4 << 24) | (t3 >> 8); t0 = h0 * r0 + h4 * x1 + h3 * x2 + h2 * x3 + h1 * x4; t1 = h1 * r0 + h0 * r1 + h4 * x2 + h3 * x3 + h2 * x4; t2 = h2 * r0 + h1 * r1 + h0 * r2 + h4 * x3 + h3 * x4; t3 = h3 * r0 + h2 * r1 + h1 * r2 + h0 * r3 + h4 * x4; t4 = h4 * r0 + h3 * r1 + h2 * r2 + h1 * r3 + h0 * r4; h0 = t0 & 0x3ffffff; t1 += t0 >> 26; h1 = t1 & 0x3ffffff; t2 += t1 >> 26; h2 = t2 & 0x3ffffff; t3 += t2 >> 26; h3 = t3 & 0x3ffffff; t4 += t3 >> 26; h4 = t4 & 0x3ffffff; h0 += 5 * (t4 >> 26); h1 += h0 >> 26; h0 &= 0x3ffffff; state->h0 = h0; state->h1 = h1; state->h2 = h2; state->h3 = h3; state->h4 = h4; } void poly1305_reduce(struct poly1305_state *state, uint32_t a[POLY1305_TAGLEN_WORDS]) { uint64_t t0, t1, t2, t3, t4, g0, g1, g2, g3, g4; uint32_t mask; t0 = (state->h0 | (state->h1 << 26)) & 0xffffffff; t1 = ((state->h1 >> 6) | (state->h2 << 20)) & 0xffffffff; t2 = ((state->h2 >> 12) | (state->h3 << 14)) & 0xffffffff; t3 = ((state->h3 >> 18) | (state->h4 << 8)) & 0xffffffff; t4 = state->h4 >> 24; g0 = t0 + 5; g1 = t1 + (g0 >> 32); g2 = t2 + (g1 >> 32); g3 = t3 + (g2 >> 32); g4 = t4 + (g3 >> 32) + 252; mask = ~(g4 >> 8) + 1; t0 = (t0 & ~mask) | (g0 & mask); t1 = (t1 & ~mask) | (g1 & mask); t2 = (t2 & ~mask) | (g2 & mask); t3 = (t3 & ~mask) | (g3 & mask); t0 += state->s0; t1 += state->s1 + (t0 >> 32); t2 += state->s2 + (t1 >> 32); t3 += state->s3 + (t2 >> 32); a[0] = t0 & 0xffffffff; a[1] = t1 & 0xffffffff; a[2] = t2 & 0xffffffff; a[3] = t3 & 0xffffffff; }