<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> <meta http-equiv="X-UA-Compatible" content="IE=9"/> <meta name="generator" content="Doxygen 1.8.12"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <title>libcbor: src/cbor/internal/unicode.c Source File</title> <link href="tabs.css" rel="stylesheet" type="text/css"/> <script type="text/javascript" src="jquery.js"></script> <script type="text/javascript" src="dynsections.js"></script> <link href="search/search.css" rel="stylesheet" type="text/css"/> <script type="text/javascript" src="search/searchdata.js"></script> <script type="text/javascript" src="search/search.js"></script> <link href="doxygen.css" rel="stylesheet" type="text/css" /> <link href="customdoxygen.css" rel="stylesheet" type="text/css"/> </head> <body> <div id="top"><!-- do not remove this div, it is closed by doxygen! --> <div id="titlearea"> <table cellspacing="0" cellpadding="0"> <tbody> <tr style="height: 56px;"> <td id="projectalign" style="padding-left: 0.5em;"> <div id="projectname">libcbor  <span id="projectnumber">0.5.0</span> </div> <div id="projectbrief">libcbor is a C library for parsing and generating CBOR, the general-purpose schema-less binary data format.</div> </td> </tr> </tbody> </table> </div> <!-- end header part --> <!-- Generated by Doxygen 1.8.12 --> <script type="text/javascript"> var searchBox = new SearchBox("searchBox", "search",false,'Search'); </script> <script type="text/javascript" src="menudata.js"></script> <script type="text/javascript" src="menu.js"></script> <script type="text/javascript"> $(function() { initMenu('',true,false,'search.php','Search'); $(document).ready(function() { init_search(); }); }); </script> <div id="main-nav"></div> <!-- window showing the filter options --> <div id="MSearchSelectWindow" onmouseover="return searchBox.OnSearchSelectShow()" onmouseout="return searchBox.OnSearchSelectHide()" onkeydown="return searchBox.OnSearchSelectKey(event)"> </div> <!-- iframe showing the search results (closed by default) --> <div id="MSearchResultsWindow"> <iframe src="javascript:void(0)" frameborder="0" name="MSearchResults" id="MSearchResults"> </iframe> </div> <div id="nav-path" class="navpath"> <ul> <li class="navelem"><a class="el" href="dir_68267d1309a1af8e8297ef4c3efbcdba.html">src</a></li><li class="navelem"><a class="el" href="dir_cb565ba51d82ea933604984cbab6233d.html">cbor</a></li><li class="navelem"><a class="el" href="dir_3ebe94c85f6786bf802771098c1d09bb.html">internal</a></li> </ul> </div> </div><!-- top --> <div class="header"> <div class="headertitle"> <div class="title">unicode.c</div> </div> </div><!--header--> <div class="contents"> <a href="unicode_8c.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno"> 1</span> <span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno"> 2</span> <span class="comment"> * Copyright (c) 2014-2017 Pavel Kalvoda <me@pavelkalvoda.com></span></div><div class="line"><a name="l00003"></a><span class="lineno"> 3</span> <span class="comment"> *</span></div><div class="line"><a name="l00004"></a><span class="lineno"> 4</span> <span class="comment"> * libcbor is free software; you can redistribute it and/or modify</span></div><div class="line"><a name="l00005"></a><span class="lineno"> 5</span> <span class="comment"> * it under the terms of the MIT license. See LICENSE for details.</span></div><div class="line"><a name="l00006"></a><span class="lineno"> 6</span> <span class="comment"> */</span></div><div class="line"><a name="l00007"></a><span class="lineno"> 7</span> </div><div class="line"><a name="l00008"></a><span class="lineno"> 8</span> <span class="preprocessor">#include "<a class="code" href="unicode_8h.html">unicode.h</a>"</span></div><div class="line"><a name="l00009"></a><span class="lineno"> 9</span> </div><div class="line"><a name="l00010"></a><span class="lineno"><a class="line" href="unicode_8c.html#a82b09bd7c24e408c73e16db56b8db6cf"> 10</a></span> <span class="preprocessor">#define UTF8_ACCEPT 0</span></div><div class="line"><a name="l00011"></a><span class="lineno"><a class="line" href="unicode_8c.html#a2e385a04dd7c4529414ab34a75a4f9ef"> 11</a></span> <span class="preprocessor">#define UTF8_REJECT 1</span></div><div class="line"><a name="l00012"></a><span class="lineno"> 12</span> </div><div class="line"><a name="l00013"></a><span class="lineno"> 13</span> <span class="keyword">static</span> <span class="keyword">const</span> uint8_t utf8d[] = {</div><div class="line"><a name="l00014"></a><span class="lineno"> 14</span>  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, <span class="comment">/* 00..1f */</span></div><div class="line"><a name="l00015"></a><span class="lineno"> 15</span>  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, <span class="comment">/* 20..3f */</span></div><div class="line"><a name="l00016"></a><span class="lineno"> 16</span>  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, <span class="comment">/* 40..5f */</span></div><div class="line"><a name="l00017"></a><span class="lineno"> 17</span>  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, <span class="comment">/* 60..7f */</span></div><div class="line"><a name="l00018"></a><span class="lineno"> 18</span>  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, <span class="comment">/* 80..9f */</span></div><div class="line"><a name="l00019"></a><span class="lineno"> 19</span>  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, <span class="comment">/* a0..bf */</span></div><div class="line"><a name="l00020"></a><span class="lineno"> 20</span>  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, <span class="comment">/* c0..df */</span></div><div class="line"><a name="l00021"></a><span class="lineno"> 21</span>  0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, <span class="comment">/* e0..ef */</span></div><div class="line"><a name="l00022"></a><span class="lineno"> 22</span>  0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, <span class="comment">/* f0..ff */</span></div><div class="line"><a name="l00023"></a><span class="lineno"> 23</span>  0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, <span class="comment">/* s0..s0 */</span></div><div class="line"><a name="l00024"></a><span class="lineno"> 24</span>  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, <span class="comment">/* s1..s2 */</span></div><div class="line"><a name="l00025"></a><span class="lineno"> 25</span>  1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, <span class="comment">/* s3..s4 */</span></div><div class="line"><a name="l00026"></a><span class="lineno"> 26</span>  1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, <span class="comment">/* s5..s6 */</span></div><div class="line"><a name="l00027"></a><span class="lineno"> 27</span>  1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, <span class="comment">/* s7..s8 */</span></div><div class="line"><a name="l00028"></a><span class="lineno"> 28</span> };</div><div class="line"><a name="l00029"></a><span class="lineno"> 29</span> </div><div class="line"><a name="l00030"></a><span class="lineno"> 30</span> <span class="comment">/* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> */</span></div><div class="line"><a name="l00031"></a><span class="lineno"> 31</span> <span class="comment">/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */</span></div><div class="line"><a name="l00032"></a><span class="lineno"><a class="line" href="unicode_8c.html#aea748174291fa35e154c69e7669f199b"> 32</a></span> uint32_t <a class="code" href="unicode_8c.html#aea748174291fa35e154c69e7669f199b">_cbor_unicode_decode</a>(uint32_t* state, uint32_t* codep, uint32_t byte) {</div><div class="line"><a name="l00033"></a><span class="lineno"> 33</span>  uint32_t type = utf8d[byte];</div><div class="line"><a name="l00034"></a><span class="lineno"> 34</span> </div><div class="line"><a name="l00035"></a><span class="lineno"> 35</span>  *codep = (*state != <a class="code" href="unicode_8c.html#a82b09bd7c24e408c73e16db56b8db6cf">UTF8_ACCEPT</a>) ?</div><div class="line"><a name="l00036"></a><span class="lineno"> 36</span>  (byte & 0x3fu) | (*codep << 6) :</div><div class="line"><a name="l00037"></a><span class="lineno"> 37</span>  (0xff >> type) & (byte);</div><div class="line"><a name="l00038"></a><span class="lineno"> 38</span> </div><div class="line"><a name="l00039"></a><span class="lineno"> 39</span>  *state = utf8d[256 + *state * 16 + type];</div><div class="line"><a name="l00040"></a><span class="lineno"> 40</span>  <span class="keywordflow">return</span> *state;</div><div class="line"><a name="l00041"></a><span class="lineno"> 41</span> }</div><div class="line"><a name="l00042"></a><span class="lineno"> 42</span> </div><div class="line"><a name="l00043"></a><span class="lineno"><a class="line" href="unicode_8h.html#ade9ebfe3d9b9bfad4c9fd5c158d519a8"> 43</a></span> <span class="keywordtype">size_t</span> <a class="code" href="unicode_8c.html#ade9ebfe3d9b9bfad4c9fd5c158d519a8">_cbor_unicode_codepoint_count</a>(<a class="code" href="data_8h.html#a121c5944682215e742475f12f07c0a72">cbor_data</a> source, <span class="keywordtype">size_t</span> source_length, <span class="keyword">struct</span> <a class="code" href="struct__cbor__unicode__status.html">_cbor_unicode_status</a> * status)</div><div class="line"><a name="l00044"></a><span class="lineno"> 44</span> {</div><div class="line"><a name="l00045"></a><span class="lineno"> 45</span>  *status = (<span class="keyword">struct </span><a class="code" href="struct__cbor__unicode__status.html">_cbor_unicode_status</a>) { .<a class="code" href="struct__cbor__unicode__status.html#a74b5cdcf18d76a2cdb19c37857f170a7">location</a> = 0, .status = <a class="code" href="unicode_8h.html#a5f58283b505b6cc8a5646516ae20831faf42cbfe1ac853288773814920e16318c">_CBOR_UNICODE_OK</a> };</div><div class="line"><a name="l00046"></a><span class="lineno"> 46</span>  uint32_t codepoint, state = <a class="code" href="unicode_8c.html#a82b09bd7c24e408c73e16db56b8db6cf">UTF8_ACCEPT</a>, res;</div><div class="line"><a name="l00047"></a><span class="lineno"> 47</span>  <span class="keywordtype">size_t</span> pos = 0, count = 0;</div><div class="line"><a name="l00048"></a><span class="lineno"> 48</span> </div><div class="line"><a name="l00049"></a><span class="lineno"> 49</span>  <span class="keywordflow">for</span> (; pos < source_length; pos++)</div><div class="line"><a name="l00050"></a><span class="lineno"> 50</span>  {</div><div class="line"><a name="l00051"></a><span class="lineno"> 51</span>  res = <a class="code" href="unicode_8c.html#aea748174291fa35e154c69e7669f199b">_cbor_unicode_decode</a>(&state, &codepoint, source[pos]);</div><div class="line"><a name="l00052"></a><span class="lineno"> 52</span> </div><div class="line"><a name="l00053"></a><span class="lineno"> 53</span>  <span class="keywordflow">if</span> (res == <a class="code" href="unicode_8c.html#a82b09bd7c24e408c73e16db56b8db6cf">UTF8_ACCEPT</a>) {</div><div class="line"><a name="l00054"></a><span class="lineno"> 54</span>  count++;</div><div class="line"><a name="l00055"></a><span class="lineno"> 55</span>  } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (res == <a class="code" href="unicode_8c.html#a2e385a04dd7c4529414ab34a75a4f9ef">UTF8_REJECT</a>) {</div><div class="line"><a name="l00056"></a><span class="lineno"> 56</span>  <span class="keywordflow">goto</span> error;</div><div class="line"><a name="l00057"></a><span class="lineno"> 57</span>  }</div><div class="line"><a name="l00058"></a><span class="lineno"> 58</span>  }</div><div class="line"><a name="l00059"></a><span class="lineno"> 59</span> </div><div class="line"><a name="l00060"></a><span class="lineno"> 60</span>  <span class="comment">/* Unfinished multibyte codepoint */</span></div><div class="line"><a name="l00061"></a><span class="lineno"> 61</span>  <span class="keywordflow">if</span> (state != <a class="code" href="unicode_8c.html#a82b09bd7c24e408c73e16db56b8db6cf">UTF8_ACCEPT</a>)</div><div class="line"><a name="l00062"></a><span class="lineno"> 62</span>  <span class="keywordflow">goto</span> error;</div><div class="line"><a name="l00063"></a><span class="lineno"> 63</span> </div><div class="line"><a name="l00064"></a><span class="lineno"> 64</span>  <span class="keywordflow">return</span> count;</div><div class="line"><a name="l00065"></a><span class="lineno"> 65</span> </div><div class="line"><a name="l00066"></a><span class="lineno"> 66</span>  error:</div><div class="line"><a name="l00067"></a><span class="lineno"> 67</span>  *status = (<span class="keyword">struct </span><a class="code" href="struct__cbor__unicode__status.html">_cbor_unicode_status</a>) { .<a class="code" href="struct__cbor__unicode__status.html#a74b5cdcf18d76a2cdb19c37857f170a7">location</a> = pos, .status = <a class="code" href="unicode_8h.html#a5f58283b505b6cc8a5646516ae20831fa7ddb42c4f16ebd6d46fe1e331c643c8b">_CBOR_UNICODE_BADCP</a> };</div><div class="line"><a name="l00068"></a><span class="lineno"> 68</span>  <span class="keywordflow">return</span> -1;</div><div class="line"><a name="l00069"></a><span class="lineno"> 69</span> }</div><div class="ttc" id="unicode_8h_html"><div class="ttname"><a href="unicode_8h.html">unicode.h</a></div></div> <div class="ttc" id="unicode_8c_html_a2e385a04dd7c4529414ab34a75a4f9ef"><div class="ttname"><a href="unicode_8c.html#a2e385a04dd7c4529414ab34a75a4f9ef">UTF8_REJECT</a></div><div class="ttdeci">#define UTF8_REJECT</div><div class="ttdef"><b>Definition:</b> <a href="unicode_8c_source.html#l00011">unicode.c:11</a></div></div> <div class="ttc" id="unicode_8c_html_aea748174291fa35e154c69e7669f199b"><div class="ttname"><a href="unicode_8c.html#aea748174291fa35e154c69e7669f199b">_cbor_unicode_decode</a></div><div class="ttdeci">uint32_t _cbor_unicode_decode(uint32_t *state, uint32_t *codep, uint32_t byte)</div><div class="ttdef"><b>Definition:</b> <a href="unicode_8c_source.html#l00032">unicode.c:32</a></div></div> <div class="ttc" id="unicode_8c_html_a82b09bd7c24e408c73e16db56b8db6cf"><div class="ttname"><a href="unicode_8c.html#a82b09bd7c24e408c73e16db56b8db6cf">UTF8_ACCEPT</a></div><div class="ttdeci">#define UTF8_ACCEPT</div><div class="ttdef"><b>Definition:</b> <a href="unicode_8c_source.html#l00010">unicode.c:10</a></div></div> <div class="ttc" id="struct__cbor__unicode__status_html"><div class="ttname"><a href="struct__cbor__unicode__status.html">_cbor_unicode_status</a></div><div class="ttdoc">Signals unicode validation error and possibly its location. </div><div class="ttdef"><b>Definition:</b> <a href="unicode_8h_source.html#l00023">unicode.h:23</a></div></div> <div class="ttc" id="struct__cbor__unicode__status_html_a74b5cdcf18d76a2cdb19c37857f170a7"><div class="ttname"><a href="struct__cbor__unicode__status.html#a74b5cdcf18d76a2cdb19c37857f170a7">_cbor_unicode_status::location</a></div><div class="ttdeci">size_t location</div><div class="ttdef"><b>Definition:</b> <a href="unicode_8h_source.html#l00025">unicode.h:25</a></div></div> <div class="ttc" id="data_8h_html_a121c5944682215e742475f12f07c0a72"><div class="ttname"><a href="data_8h.html#a121c5944682215e742475f12f07c0a72">cbor_data</a></div><div class="ttdeci">const unsigned char * cbor_data</div><div class="ttdef"><b>Definition:</b> <a href="data_8h_source.html#l00020">data.h:20</a></div></div> <div class="ttc" id="unicode_8c_html_ade9ebfe3d9b9bfad4c9fd5c158d519a8"><div class="ttname"><a href="unicode_8c.html#ade9ebfe3d9b9bfad4c9fd5c158d519a8">_cbor_unicode_codepoint_count</a></div><div class="ttdeci">size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length, struct _cbor_unicode_status *status)</div><div class="ttdef"><b>Definition:</b> <a href="unicode_8c_source.html#l00043">unicode.c:43</a></div></div> <div class="ttc" id="unicode_8h_html_a5f58283b505b6cc8a5646516ae20831fa7ddb42c4f16ebd6d46fe1e331c643c8b"><div class="ttname"><a href="unicode_8h.html#a5f58283b505b6cc8a5646516ae20831fa7ddb42c4f16ebd6d46fe1e331c643c8b">_CBOR_UNICODE_BADCP</a></div><div class="ttdef"><b>Definition:</b> <a href="unicode_8h_source.html#l00019">unicode.h:19</a></div></div> <div class="ttc" id="unicode_8h_html_a5f58283b505b6cc8a5646516ae20831faf42cbfe1ac853288773814920e16318c"><div class="ttname"><a href="unicode_8h.html#a5f58283b505b6cc8a5646516ae20831faf42cbfe1ac853288773814920e16318c">_CBOR_UNICODE_OK</a></div><div class="ttdef"><b>Definition:</b> <a href="unicode_8h_source.html#l00018">unicode.h:18</a></div></div> </div><!-- fragment --></div><!-- contents --> <!-- start footer part --> <hr class="footer"/><address class="footer"><small> Generated on Mon Feb 6 2017 00:26:19 for libcbor by  <a href="http://www.doxygen.org/index.html"> <img class="footer" src="doxygen.png" alt="doxygen"/> </a> 1.8.12 </small></address> </body> </html> |