/* $NetBSD: midna_domain.c,v 1.2 2017/02/14 01:16:49 christos Exp $ */
/*++
/* NAME
/* midna_domain 3
/* SUMMARY
/* ASCII/UTF-8 domain name conversion
/* SYNOPSIS
/* #include <midna_domain.h>
/*
/* int midna_domain_cache_size;
/*
/* const char *midna_domain_to_ascii(
/* const char *name)
/*
/* const char *midna_domain_to_utf8(
/* const char *name)
/*
/* const char *midna_domain_suffix_to_ascii(
/* const char *name)
/*
/* const char *midna_domain_suffix_to_utf8(
/* const char *name)
/* DESCRIPTION
/* The functions in this module transform domain names from/to
/* ASCII and UTF-8 form. The result is cached to avoid repeated
/* conversion.
/*
/* This module builds on the ICU library implementation of the
/* UTS #46 specification, using default ICU library options
/* because those are likely best tested: with transitional
/* processing, with case mapping, with normalization, with
/* limited IDNA2003 compatibility, without STD3 ASCII rules.
/*
/* midna_domain_to_ascii() converts an UTF-8 or ASCII domain
/* name to ASCII. The result is a null pointer in case of
/* error. This function verifies that the result passes
/* valid_hostname().
/*
/* midna_domain_to_utf8() converts an UTF-8 or ASCII domain
/* name to UTF-8. The result is a null pointer in case of
/* error. This function verifies that the result, after
/* conversion to ASCII, passes valid_hostname().
/*
/* midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8()
/* take a name that starts with '.' and otherwise perform the
/* same operations as midna_domain_to_ascii() and
/* midna_domain_to_utf8().
/*
/* midna_domain_cache_size specifies the size of the conversion
/* result cache. This value is used only once, upon the first
/* lookup
/* request.
/* SEE ALSO
/* http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing
/* msg(3) diagnostics interface
/* DIAGNOSTICS
/* Fatal errors: memory allocation problem.
/* Warnings: conversion error or result validation error.
/* LICENSE
/* .ad
/* .fi
/* The Secure Mailer license must be distributed with this software.
/* AUTHOR(S)
/* Arnt Gulbrandsen
/*
/* Wietse Venema
/* IBM T.J. Watson Research
/* P.O. Box 704
/* Yorktown Heights, NY 10598, USA
/*--*/
/*
* System library.
*/
#include <sys_defs.h>
#include <string.h>
#include <ctype.h>
#ifndef NO_EAI
#include <unicode/uidna.h>
/*
* Utility library.
*/
#include <mymalloc.h>
#include <msg.h>
#include <ctable.h>
#include <stringops.h>
#include <valid_hostname.h>
#include <midna_domain.h>
/*
* Application-specific.
*/
#define DEF_MIDNA_CACHE_SIZE 256
int midna_domain_cache_size = DEF_MIDNA_CACHE_SIZE;
static VSTRING *midna_domain_buf; /* x.suffix */
#define STR(x) vstring_str(x)
/* midna_domain_to_ascii_create - convert domain to ASCII */
static void *midna_domain_to_ascii_create(const char *name, void *unused_context)
{
static const char myname[] = "midna_domain_to_ascii_create";
char buf[1024]; /* XXX */
UErrorCode error = U_ZERO_ERROR;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
UIDNA *idna;
int anl;
/*
* Paranoia: do not expose uidna_*() to unfiltered network data.
*/
if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
myname, name, "malformed UTF-8");
return (0);
}
/*
* Perform the requested conversion.
*/
idna = uidna_openUTS46(UIDNA_DEFAULT, &error);/* XXX check error */
anl = uidna_nameToASCII_UTF8(idna,
name, strlen(name),
buf, sizeof(buf) - 1,
&info,
&error);
uidna_close(idna);
/*
* Paranoia: verify that the result passes valid_hostname(). A quick
* check shows that UTS46 ToASCII by default rejects inputs with labels
* that start or end in '-', with names or labels that are over-long, or
* "fake" A-labels, as required by UTS 46 section 4.1, but we rely on
* valid_hostname() on the output side just to be sure.
*/
if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
buf[anl] = 0; /* XXX */
if (!valid_hostname(buf, DONT_GRIPE)) {
msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
myname, name, "malformed ASCII label(s)");
return (0);
}
return (mystrndup(buf, anl));
} else {
msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
myname, name, u_errorName(info.errors));
return (0);
}
}
/* midna_domain_to_utf8_create - convert domain to UTF8 */
static void *midna_domain_to_utf8_create(const char *name, void *unused_context)
{
static const char myname[] = "midna_domain_to_utf8_create";
char buf[1024]; /* XXX */
UErrorCode error = U_ZERO_ERROR;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
UIDNA *idna;
int anl;
/*
* Paranoia: do not expose uidna_*() to unfiltered network data.
*/
if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
myname, name, "malformed UTF-8");
return (0);
}
/*
* Perform the requested conversion.
*/
idna = uidna_openUTS46(UIDNA_DEFAULT, &error);/* XXX check error */
anl = uidna_nameToUnicodeUTF8(idna,
name, strlen(name),
buf, sizeof(buf) - 1,
&info,
&error);
uidna_close(idna);
/*
* Paranoia: UTS46 toUTF8 by default accepts and produces an over-long
* name or a name that contains an over-long NR-LDH label (and perhaps
* other invalid forms that are not covered in UTS 46, section 4.1). We
* rely on midna_domain_to_ascii() to validate the output.
*/
if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
buf[anl] = 0; /* XXX */
if (midna_domain_to_ascii(buf) == 0)
return (0);
return (mystrndup(buf, anl));
} else {
msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s",
myname, name, u_errorName(info.errors));
return (0);
}
}
/* midna_domain_cache_free - cache element destructor */
static void midna_domain_cache_free(void *value, void *unused_context)
{
if (value)
myfree(value);
}
/* midna_domain_to_ascii - convert name to ASCII */
const char *midna_domain_to_ascii(const char *name)
{
static CTABLE *midna_domain_to_ascii_cache = 0;
if (midna_domain_to_ascii_cache == 0)
midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size,
midna_domain_to_ascii_create,
midna_domain_cache_free,
(void *) 0);
return (ctable_locate(midna_domain_to_ascii_cache, name));
}
/* midna_domain_to_utf8 - convert name to UTF8 */
const char *midna_domain_to_utf8(const char *name)
{
static CTABLE *midna_domain_to_utf8_cache = 0;
if (midna_domain_to_utf8_cache == 0)
midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size,
midna_domain_to_utf8_create,
midna_domain_cache_free,
(void *) 0);
return (ctable_locate(midna_domain_to_utf8_cache, name));
}
/* midna_domain_suffix_to_ascii - convert .name to ASCII */
const char *midna_domain_suffix_to_ascii(const char *suffix)
{
const char *cache_res;
/*
* If prepending x to .name causes the result to become too long, then
* the suffix is bad.
*/
if (midna_domain_buf == 0)
midna_domain_buf = vstring_alloc(100);
vstring_sprintf(midna_domain_buf, "x%s", suffix);
if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0)
return (0);
else
return (cache_res + 1);
}
/* midna_domain_suffix_to_utf8 - convert .name to UTF8 */
const char *midna_domain_suffix_to_utf8(const char *name)
{
const char *cache_res;
/*
* If prepending x to .name causes the result to become too long, then
* the suffix is bad.
*/
if (midna_domain_buf == 0)
midna_domain_buf = vstring_alloc(100);
vstring_sprintf(midna_domain_buf, "x%s", name);
if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0)
return (0);
else
return (cache_res + 1);
}
#ifdef TEST
/*
* Test program - reads names from stdin, reports invalid names to stderr.
*/
#include <stdlib.h>
#include <locale.h>
#include <stringops.h> /* XXX util_utf8_enable */
#include <vstring.h>
#include <vstream.h>
#include <vstring_vstream.h>
#include <msg_vstream.h>
int main(int argc, char **argv)
{
VSTRING *buffer = vstring_alloc(1);
const char *bp;
const char *ascii;
const char *utf8;
if (setlocale(LC_ALL, "C") == 0)
msg_fatal("setlocale(LC_ALL, C) failed: %m");
msg_vstream_init(argv[0], VSTREAM_ERR);
/* msg_verbose = 1; */
util_utf8_enable = 1;
while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
bp = STR(buffer);
msg_info("> %s", bp);
while (ISSPACE(*bp))
bp++;
if (*bp == '#' || *bp == 0)
continue;
msg_info("unconditional conversions:");
utf8 = midna_domain_to_utf8(bp);
msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)");
ascii = midna_domain_to_ascii(bp);
msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)");
msg_info("conditional conversions:");
if (!allascii(bp)) {
if (ascii != 0) {
utf8 = midna_domain_to_utf8(ascii);
msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"",
bp, ascii, utf8 ? utf8 : "(error)");
if (utf8 != 0) {
if (strcmp(utf8, bp) != 0)
msg_warn("\"%s\" != \"%s\"", bp, utf8);
}
}
} else {
if (utf8 != 0) {
ascii = midna_domain_to_ascii(utf8);
msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"",
bp, utf8, ascii ? ascii : "(error)");
if (ascii != 0) {
if (strcmp(ascii, bp) != 0)
msg_warn("\"%s\" != \"%s\"", bp, ascii);
}
}
}
}
exit(0);
}
#endif /* TEST */
#endif /* NO_EAI */