commit d95e10600cc0f1d2fd31f848fb38039b67ac2592 Author: Lucas Gabriel Vuotto Date: Tue Jan 28 00:24:03 2025 +0000 Initial import diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2416a67 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +obj/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c6a067a --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +PROG= bt +NOMAN= noman + +SRCS= bt.c bcode.c + +WARNINGS= Yes + +.include diff --git a/bcode.c b/bcode.c new file mode 100644 index 0000000..ff333e4 --- /dev/null +++ b/bcode.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2025 Lucas Gabriel Vuotto + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "bcode.h" + + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +#define GROWTH_FACTOR 16 + + +enum bcode_type { + BCODE_UNASSIGNED, + BCODE_STRING, + BCODE_INTEGER, + BCODE_LIST, + BCODE_DICTIONARY, +}; + +struct bcode_string { + size_t len; + void *data; +}; + +struct bcode_list { + size_t sz; + size_t cap; + struct bcode *elems; +}; + +struct bcode_dictionary { + size_t sz; + size_t cap; + struct bcode_kv *elems; +}; + +struct bcode { + enum bcode_type type; + union { + struct bcode_string s; + int64_t i; + struct bcode_list l; + struct bcode_dictionary d; + } value; +}; + +/* + * This struct is needed because the value isn't a pointer; inlining it in + * bcode_dictionary leads to a compiler error because of the use of an + * undeclared type. + */ +struct bcode_kv { + struct bcode_string k; + struct bcode v; +}; + + +static int kv_cmp(const void *, const void *); +static int grow_list(struct bcode_list *); +static int grow_dictionary(struct bcode_dictionary *); +static size_t parse_string(struct bcode_string *, const uint8_t *, size_t); +static size_t parse_integer(int64_t *, const uint8_t *, size_t); +static size_t parse_list(struct bcode_list *, const uint8_t *, size_t); +static size_t parse_dictionary(struct bcode_dictionary *, const uint8_t *, + size_t); +static size_t parse_internal(struct bcode *, const uint8_t *, size_t); + + +static int +kv_cmp(const void *ap, const void *bp) +{ + const struct bcode_kv *a = ap, *b = bp; + size_t min; + int r; + + min = MIN(a->k.len, b->k.len); + r = memcmp(a->k.data, b->k.data, min); + + if (r != 0) + return r; + + return a->k.len < b->k.len ? -1 : a->k.len > b->k.len ? 1 : 0; +} + +static int +grow_list(struct bcode_list *l) +{ + void *p; + size_t newcap; + + if (l->sz < l->cap) + return 1; + if (l->cap > SIZE_MAX - GROWTH_FACTOR) + return 0; + newcap = l->cap + GROWTH_FACTOR; + + p = recallocarray(l->elems, l->cap, newcap, sizeof(*l->elems)); + if (p == NULL) + return 0; + l->elems = p; + l->cap = newcap; + + return 1; +} + +static int +grow_dictionary(struct bcode_dictionary *d) +{ + void *p; + size_t newcap; + + if (d->sz < d->cap) + return 1; + if (d->cap > SIZE_MAX - GROWTH_FACTOR) + return 0; + newcap = d->cap + GROWTH_FACTOR; + + p = recallocarray(d->elems, d->cap, newcap, sizeof(*d->elems)); + if (p == NULL) + return 0; + d->elems = p; + d->cap = newcap; + + return 1; +} + +static size_t +parse_string(struct bcode_string *s, const uint8_t *dp, size_t sz) +{ + const char *begin; + char *end; + size_t consumed = 0; + uintmax_t umax; + + /* Shortest string is "0:". */ + if (sz < 2) + return 0; + + /* + * dp might be a binary string, so it isn't safe to blindly call + * strtoumax. + */ + + begin = dp; + + /* No leading zeros allowed. */ + if (*dp == '0' && sz != 2) + return 0; + + while (sz > 0 && isdigit(*dp)) { + sz--; + dp++; + consumed++; + } + if (sz == 0) + return 0; + + errno = 0; + umax = strtoumax(begin, &end, 10); + if (begin == end) + return 0; + if (errno == ERANGE && umax == UINTMAX_MAX) + return 0; + if (umax > SIZE_MAX) + return 0; + if (*end != ':') + return 0; + + sz--; + dp++; + consumed++; + + if ((size_t)umax > sz) + return 0; + + if (umax > 0) { + s->data = malloc(umax); + if (s->data == NULL) + return 0; + memcpy(s->data, dp, umax); + s->len = umax; + } else { + s->data = NULL; + s->len = 0; + } + + consumed += s->len; + + return consumed; +} + +static size_t +parse_integer(int64_t *i, const uint8_t *dp, size_t sz) +{ + const char *begin; + char *end; + size_t consumed = 0; + intmax_t imax; + + /* Shortest integer is "i0e". */ + if (sz < 3) + return 0; + + if (*dp != 'i') + return 0; + sz--; + dp++; + consumed++; + + /* + * dp might be a binary string, so it isn't safe to blindly call + * strtoimax. + */ + + begin = dp; + + /* No negative zeros allowed. */ + if (*dp == '-') { + sz--; + dp++; + consumed++; + if (*dp == '0') + return 0; + } + + /* No leading zeros allowed. */ + if (*dp == '0' && sz != 2) + return 0; + + while (sz > 0 && isdigit(*dp)) { + sz--; + dp++; + consumed++; + } + if (sz == 0) + return 0; + + errno = 0; + imax = strtoimax(begin, &end, 10); + if (begin == end) + return 0; + if (errno == ERANGE && (imax == INTMAX_MAX || imax == INTMAX_MIN)) + return 0; + if (imax > INT64_MAX || imax < INT64_MIN) + return 0; + + if (*end != 'e') + return 0; + consumed++; + + *i = imax; + + return consumed; +} + +static size_t +parse_list(struct bcode_list *l, const uint8_t *dp, size_t sz) +{ + size_t n, consumed = 0; + + /* Shortest list is "le". */ + if (sz < 2) + return 0; + + if (*dp != 'l') + return 0; + sz--; + dp++; + consumed++; + + while (*dp != 'e' && sz > 0) { + grow_list(l); + + n = parse_internal(&l->elems[l->sz], dp, sz); + if (n == 0) + return 0; + sz -= n; + dp += n; + consumed += n; + + l->sz++; + } + + if (*dp != 'e') + return 0; + consumed++; + + return consumed; +} + +static size_t +parse_dictionary(struct bcode_dictionary *d, const uint8_t *dp, size_t sz) +{ + size_t n, consumed = 0; + + /* Shortest dictionary is "de". */ + if (sz < 2) + return 0; + + if (*dp != 'd') + return 0; + sz--; + dp++; + consumed++; + + while (*dp != 'e' && sz > 0) { + grow_dictionary(d); + + n = parse_string(&d->elems[d->sz].k, dp, sz); + if (n == 0) + return 0; + sz -= n; + dp += n; + consumed += n; + + n = parse_internal(&d->elems[d->sz].v, dp, sz); + if (n == 0) + return 0; + sz -= n; + dp += n; + consumed += n; + + d->sz++; + } + + if (*dp != 'e') + return 0; + consumed++; + + return consumed; +} + +static size_t +parse_internal(struct bcode *bcode, const uint8_t *dp, size_t sz) +{ + size_t n; + + switch (*dp) { + case 'i': + n = parse_integer(&bcode->value.i, dp, sz); + if (n > 0) + bcode->type = BCODE_INTEGER; + break; + case 'l': + n = parse_list(&bcode->value.l, dp, sz); + if (n > 0) + bcode->type = BCODE_LIST; + break; + case 'd': + n = parse_dictionary(&bcode->value.d, dp, sz); + if (n > 0) { + struct bcode_dictionary d = bcode->value.d; + + qsort(d.elems, d.sz, sizeof(*d.elems), &kv_cmp); + bcode->type = BCODE_DICTIONARY; + } + break; + default: + n = parse_string(&bcode->value.s, dp, sz); + if (n > 0) + bcode->type = BCODE_STRING; + break; + } + + return n; +} + +struct bcode * +bcode_parse(const uint8_t *dp, size_t sz) +{ + struct bcode *bcode; + + bcode = calloc(1, sizeof(*bcode)); + if (bcode == NULL) + return NULL; + + if (parse_internal(bcode, dp, sz) != sz) { + bcode_free(bcode); + return NULL; + } + + return bcode; +} + +void +bcode_free(struct bcode *bcode) +{ + if (bcode == NULL) + return; + + free(bcode); +} + +static void +print_string(const struct bcode_string *s, FILE *fp) +{ + char buf[5]; + size_t i; + int nextc; + + fputc('"', fp); + for (i = 0; i < s->len; i++) { + if (i + 1 == s->len) + nextc = '"'; + else + nextc = ((unsigned char *)s->data)[i + 1]; + vis(buf, ((unsigned char *)s->data)[i], + VIS_TAB|VIS_NL|VIS_CSTYLE, nextc); + fputs(buf, fp); + } + fputc('"', fp); +} + +static void +print_internal(const struct bcode *bcode, FILE *fp, size_t lvl) +{ + size_t i, k; + + switch (bcode->type) { + case BCODE_STRING: + print_string(&bcode->value.s, fp); + break; + case BCODE_INTEGER: + fprintf(fp, "%" PRIi64, bcode->value.i); + break; + case BCODE_LIST: { + struct bcode_list l = bcode->value.l; + + fputs("[\n", fp); + lvl++; + for (i = 0; i < l.sz; i++) { + for (k = 0; k < lvl; k++) + fputs(" ", fp); + print_internal(&l.elems[i], fp, lvl); + fputs(",\n", fp); + } + lvl--; + for (k = 0; k < lvl; k++) + fputs(" ", fp); + fputc(']', fp); + + break; + } + case BCODE_DICTIONARY: { + struct bcode_dictionary d = bcode->value.d; + + fputs("{\n", fp); + lvl++; + for (i = 0; i < d.sz; i++) { + for (k = 0; k < lvl; k++) + fputs(" ", fp); + print_string(&d.elems[i].k, fp); + fputs(": ", fp); + print_internal(&d.elems[i].v, fp, lvl); + fputs(",\n", fp); + } + lvl--; + for (k = 0; k < lvl; k++) + fputs(" ", fp); + fputc('}', fp); + + break; + } + default: + /* Do nothing. */ + break; + } +} + +void +bcode_dump(const struct bcode *bcode, FILE *fp) +{ + print_internal(bcode, fp, 0); + fputc('\n', fp); +} diff --git a/bcode.h b/bcode.h new file mode 100644 index 0000000..23d51c3 --- /dev/null +++ b/bcode.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2025 Lucas Gabriel Vuotto + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + + +struct bcode; + + +struct bcode * bcode_parse(const uint8_t *, size_t); +void bcode_free(struct bcode *); +void bcode_dump(const struct bcode *, FILE *); diff --git a/bt.c b/bt.c new file mode 100644 index 0000000..e0a5e9a --- /dev/null +++ b/bt.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025 Lucas Gabriel Vuotto + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include + +#include "bcode.h" + + +int +main(int argc, char *argv[]) +{ + struct bcode *bcode; + uint8_t buf[256 * 1024]; + FILE *fp; + size_t n; + + fp = fopen("/tmp/af24ae3037cfc1ad8226b159103a148fbc81b173.torrent", "r"); + if (fp == NULL) + err(1, "fopen"); + n = fread(buf, 1, sizeof(buf), fp); + if (ferror(fp)) + err(1, "fread"); + if (!feof(fp)) + errx(1, "short read"); + + bcode = bcode_parse(buf, n); + if (bcode == NULL) + errx(1, "bcode_parse: parse error"); + bcode_dump(bcode, stdout); + bcode_free(bcode); + + (void)fclose(fp); + + return 0; +}