/* $NetBSD: unicode.cpp,v 1.1.1.1 2016/01/13 18:41:48 christos Exp $ */ // -*- C++ -*- /* Copyright (C) 2002 Free Software Foundation, Inc. Written by Werner Lemberg <wl@gnu.org> This file is part of groff. groff is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. groff is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with groff; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ #include "lib.h" #include "cset.h" #include "stringclass.h" #include "unicode.h" const char *check_unicode_name(const char *u) { if (*u != 'u') return 0; const char *p = ++u; for (;;) { int val = 0; const char *start = p; for (;;) { // only uppercase hex digits allowed if (!csxdigit(*p)) return 0; if (csdigit(*p)) val = val*0x10 + (*p-'0'); else if (csupper(*p)) val = val*0x10 + (*p-'A'+10); else return 0; // biggest Unicode value is U+10FFFF if (val > 0x10FFFF) return 0; p++; if (*p == '\0' || *p == '_') break; } // surrogates not allowed if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF)) return 0; if (val > 0xFFFF) { if (*start == '0') // no leading zeros allowed if > 0xFFFF return 0; } else if (p - start != 4) // otherwise, check for exactly 4 hex digits return 0; if (*p == '\0') break; p++; } return u; } |