/*
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "inner.h"
static void
in_cbc_init(br_sslrec_in_cbc_context *cc,
const br_block_cbcdec_class *bc_impl,
const void *bc_key, size_t bc_key_len,
const br_hash_class *dig_impl,
const void *mac_key, size_t mac_key_len, size_t mac_out_len,
const void *iv)
{
cc->vtable = &br_sslrec_in_cbc_vtable;
cc->seq = 0;
bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len);
br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len);
cc->mac_len = mac_out_len;
if (iv == NULL) {
memset(cc->iv, 0, sizeof cc->iv);
cc->explicit_IV = 1;
} else {
memcpy(cc->iv, iv, bc_impl->block_size);
cc->explicit_IV = 0;
}
}
static int
cbc_check_length(const br_sslrec_in_cbc_context *cc, size_t rlen)
{
/*
* Plaintext size: at most 16384 bytes
* Padding: at most 256 bytes
* MAC: mac_len extra bytes
* TLS 1.1+: each record has an explicit IV
*
* Minimum length includes at least one byte of padding, and the
* MAC.
*
* Total length must be a multiple of the block size.
*/
size_t blen;
size_t min_len, max_len;
blen = cc->bc.vtable->block_size;
min_len = (blen + cc->mac_len) & ~(blen - 1);
max_len = (16384 + 256 + cc->mac_len) & ~(blen - 1);
if (cc->explicit_IV) {
min_len += blen;
max_len += blen;
}
return min_len <= rlen && rlen <= max_len;
}
/*
* Rotate array buf[] of length 'len' to the left (towards low indices)
* by 'num' bytes if ctl is 1; otherwise, leave it unchanged. This is
* constant-time. 'num' MUST be lower than 'len'. 'len' MUST be lower
* than or equal to 64.
*/
static void
cond_rotate(uint32_t ctl, unsigned char *buf, size_t len, size_t num)
{
unsigned char tmp[64];
size_t u, v;
for (u = 0, v = num; u < len; u ++) {
tmp[u] = MUX(ctl, buf[v], buf[u]);
if (++ v == len) {
v = 0;
}
}
memcpy(buf, tmp, len);
}
static unsigned char *
cbc_decrypt(br_sslrec_in_cbc_context *cc,
int record_type, unsigned version, void *data, size_t *data_len)
{
/*
* We represent all lengths on 32-bit integers, because:
* -- SSL record lengths always fit in 32 bits;
* -- our constant-time primitives operate on 32-bit integers.
*/
unsigned char *buf;
uint32_t u, v, len, blen, min_len, max_len;
uint32_t good, pad_len, rot_count, len_withmac, len_nomac;
unsigned char tmp1[64], tmp2[64];
int i;
br_hmac_context hc;
buf = data;
len = *data_len;
blen = cc->bc.vtable->block_size;
/*
* Decrypt data, and skip the explicit IV (if applicable). Note
* that the total length is supposed to have been verified by
* the caller. If there is an explicit IV, then we actually
* "decrypt" it using the implicit IV (from previous record),
* which is useless but harmless.
*/
cc->bc.vtable->run(&cc->bc.vtable, cc->iv, data, len);
if (cc->explicit_IV) {
buf += blen;
len -= blen;
}
/*
* Compute minimum and maximum length of plaintext + MAC. These
* lengths can be inferred from the outside: they are not secret.
*/
min_len = (cc->mac_len + 256 < len) ? len - 256 : cc->mac_len;
max_len = len - 1;
/*
* Use the last decrypted byte to compute the actual payload
* length. Take care not to underflow (we use unsigned types).
*/
pad_len = buf[max_len];
good = LE(pad_len, (uint32_t)(max_len - min_len));
len = MUX(good, (uint32_t)(max_len - pad_len), min_len);
/*
* Check padding contents: all padding bytes must be equal to
* the value of pad_len.
*/
for (u = min_len; u < max_len; u ++) {
good &= LT(u, len) | EQ(buf[u], pad_len);
}
/*
* Extract the MAC value. This is done in one pass, but results
* in a "rotated" MAC value depending on where it actually
* occurs. The 'rot_count' value is set to the offset of the
* first MAC byte within tmp1[].
*
* min_len and max_len are also adjusted to the minimum and
* maximum lengths of the plaintext alone (without the MAC).
*/
len_withmac = (uint32_t)len;
len_nomac = len_withmac - cc->mac_len;
min_len -= cc->mac_len;
rot_count = 0;
memset(tmp1, 0, cc->mac_len);
v = 0;
for (u = min_len; u < max_len; u ++) {
tmp1[v] |= MUX(GE(u, len_nomac) & LT(u, len_withmac),
buf[u], 0x00);
rot_count = MUX(EQ(u, len_nomac), v, rot_count);
if (++ v == cc->mac_len) {
v = 0;
}
}
max_len -= cc->mac_len;
/*
* Rotate back the MAC value. The loop below does the constant-time
* rotation in time n*log n for a MAC output of length n. We assume
* that the MAC output length is no more than 64 bytes, so the
* rotation count fits on 6 bits.
*/
for (i = 5; i >= 0; i --) {
uint32_t rc;
rc = (uint32_t)1 << i;
cond_rotate(rot_count >> i, tmp1, cc->mac_len, rc);
rot_count &= ~rc;
}
/*
* Recompute the HMAC value. The input is the concatenation of
* the sequence number (8 bytes), the record header (5 bytes),
* and the payload.
*
* At that point, min_len is the minimum plaintext length, but
* max_len still includes the MAC length.
*/
br_enc64be(tmp2, cc->seq ++);
tmp2[8] = (unsigned char)record_type;
br_enc16be(tmp2 + 9, version);
br_enc16be(tmp2 + 11, len_nomac);
br_hmac_init(&hc, &cc->mac, cc->mac_len);
br_hmac_update(&hc, tmp2, 13);
br_hmac_outCT(&hc, buf, len_nomac, min_len, max_len, tmp2);
/*
* Compare the extracted and recomputed MAC values.
*/
for (u = 0; u < cc->mac_len; u ++) {
good &= EQ0(tmp1[u] ^ tmp2[u]);
}
/*
* Check that the plaintext length is valid. The previous
* check was on the encrypted length, but the padding may have
* turned shorter than expected.
*
* Once this final test is done, the critical "constant-time"
* section ends and we can make conditional jumps again.
*/
good &= LE(len_nomac, 16384);
if (!good) {
return 0;
}
*data_len = len_nomac;
return buf;
}
/* see bearssl_ssl.h */
const br_sslrec_in_cbc_class br_sslrec_in_cbc_vtable = {
{
sizeof(br_sslrec_in_cbc_context),
(int (*)(const br_sslrec_in_class *const *, size_t))
&cbc_check_length,
(unsigned char *(*)(const br_sslrec_in_class **,
int, unsigned, void *, size_t *))
&cbc_decrypt
},
(void (*)(const br_sslrec_in_cbc_class **,
const br_block_cbcdec_class *, const void *, size_t,
const br_hash_class *, const void *, size_t, size_t,
const void *))
&in_cbc_init
};
/*
* For CBC output:
*
* -- With TLS 1.1+, there is an explicit IV. Generation method uses
* HMAC, computed over the current sequence number, and the current MAC
* key. The resulting value is truncated to the size of a block, and
* added at the head of the plaintext; it will get encrypted along with
* the data. This custom generation mechanism is "safe" under the
* assumption that HMAC behaves like a random oracle; since the MAC for
* a record is computed over the concatenation of the sequence number,
* the record header and the plaintext, the HMAC-for-IV will not collide
* with the normal HMAC.
*
* -- With TLS 1.0, for application data, we want to enforce a 1/n-1
* split, as a countermeasure against chosen-plaintext attacks. We thus
* need to leave some room in the buffer for that extra record.
*/
static void
out_cbc_init(br_sslrec_out_cbc_context *cc,
const br_block_cbcenc_class *bc_impl,
const void *bc_key, size_t bc_key_len,
const br_hash_class *dig_impl,
const void *mac_key, size_t mac_key_len, size_t mac_out_len,
const void *iv)
{
cc->vtable = &br_sslrec_out_cbc_vtable;
cc->seq = 0;
bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len);
br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len);
cc->mac_len = mac_out_len;
if (iv == NULL) {
memset(cc->iv, 0, sizeof cc->iv);
cc->explicit_IV = 1;
} else {
memcpy(cc->iv, iv, bc_impl->block_size);
cc->explicit_IV = 0;
}
}
static void
cbc_max_plaintext(const br_sslrec_out_cbc_context *cc,
size_t *start, size_t *end)
{
size_t blen, len;
blen = cc->bc.vtable->block_size;
if (cc->explicit_IV) {
*start += blen;
} else {
*start += 4 + ((cc->mac_len + blen + 1) & ~(blen - 1));
}
len = (*end - *start) & ~(blen - 1);
len -= 1 + cc->mac_len;
if (len > 16384) {
len = 16384;
}
*end = *start + len;
}
static unsigned char *
cbc_encrypt(br_sslrec_out_cbc_context *cc,
int record_type, unsigned version, void *data, size_t *data_len)
{
unsigned char *buf, *rbuf;
size_t len, blen, plen;
unsigned char tmp[13];
br_hmac_context hc;
buf = data;
len = *data_len;
blen = cc->bc.vtable->block_size;
/*
* If using TLS 1.0, with more than one byte of plaintext, and
* the record is application data, then we need to compute
* a "split". We do not perform the split on other record types
* because it turned out that some existing, deployed
* implementations of SSL/TLS do not tolerate the splitting of
* some message types (in particular the Finished message).
*
* If using TLS 1.1+, then there is an explicit IV. We produce
* that IV by adding an extra initial plaintext block, whose
* value is computed with HMAC over the record sequence number.
*/
if (cc->explicit_IV) {
/*
* We use here the fact that all the HMAC variants we
* support can produce at least 16 bytes, while all the
* block ciphers we support have blocks of no more than
* 16 bytes. Thus, we can always truncate the HMAC output
* down to the block size.
*/
br_enc64be(tmp, cc->seq);
br_hmac_init(&hc, &cc->mac, blen);
br_hmac_update(&hc, tmp, 8);
br_hmac_out(&hc, buf - blen);
rbuf = buf - blen - 5;
} else {
if (len > 1 && record_type == BR_SSL_APPLICATION_DATA) {
/*
* To do the split, we use a recursive invocation;
* since we only give one byte to the inner call,
* the recursion stops there.
*
* We need to compute the exact size of the extra
* record, so that the two resulting records end up
* being sequential in RAM.
*
* We use here the fact that cbc_max_plaintext()
* adjusted the start offset to leave room for the
* initial fragment.
*/
size_t xlen;
rbuf = buf - 4
- ((cc->mac_len + blen + 1) & ~(blen - 1));
rbuf[0] = buf[0];
xlen = 1;
rbuf = cbc_encrypt(cc, record_type,
version, rbuf, &xlen);
buf ++;
len --;
} else {
rbuf = buf - 5;
}
}
/*
* Compute MAC.
*/
br_enc64be(tmp, cc->seq ++);
tmp[8] = record_type;
br_enc16be(tmp + 9, version);
br_enc16be(tmp + 11, len);
br_hmac_init(&hc, &cc->mac, cc->mac_len);
br_hmac_update(&hc, tmp, 13);
br_hmac_update(&hc, buf, len);
br_hmac_out(&hc, buf + len);
len += cc->mac_len;
/*
* Add padding.
*/
plen = blen - (len & (blen - 1));
memset(buf + len, (unsigned)plen - 1, plen);
len += plen;
/*
* If an explicit IV is used, the corresponding extra block was
* already put in place earlier; we just have to account for it
* here.
*/
if (cc->explicit_IV) {
buf -= blen;
len += blen;
}
/*
* Encrypt the whole thing. If there is an explicit IV, we also
* encrypt it, which is fine (encryption of a uniformly random
* block is still a uniformly random block).
*/
cc->bc.vtable->run(&cc->bc.vtable, cc->iv, buf, len);
/*
* Add the header and return.
*/
buf[-5] = record_type;
br_enc16be(buf - 4, version);
br_enc16be(buf - 2, len);
*data_len = (size_t)((buf + len) - rbuf);
return rbuf;
}
/* see bearssl_ssl.h */
const br_sslrec_out_cbc_class br_sslrec_out_cbc_vtable = {
{
sizeof(br_sslrec_out_cbc_context),
(void (*)(const br_sslrec_out_class *const *,
size_t *, size_t *))
&cbc_max_plaintext,
(unsigned char *(*)(const br_sslrec_out_class **,
int, unsigned, void *, size_t *))
&cbc_encrypt
},
(void (*)(const br_sslrec_out_cbc_class **,
const br_block_cbcenc_class *, const void *, size_t,
const br_hash_class *, const void *, size_t, size_t,
const void *))
&out_cbc_init
};