/* $NetBSD: C.c,v 1.19 2009/07/13 19:05:40 roy Exp $ */
/*
* Copyright (c) 1987, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#if defined(__RCSID) && !defined(lint)
#if 0
static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
#else
__RCSID("$NetBSD: C.c,v 1.19 2009/07/13 19:05:40 roy Exp $");
#endif
#endif /* not lint */
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include "ctags.h"
static int func_entry(void);
static void hash_entry(void);
static void skip_string(int);
static int str_entry(int);
/*
* c_entries --
* read .c and .h files and call appropriate routines
*/
void
c_entries(void)
{
int c; /* current character */
int level; /* brace level */
int token; /* if reading a token */
int t_def; /* if reading a typedef */
int t_level; /* typedef's brace level */
char *sp; /* buffer pointer */
char tok[MAXTOKEN]; /* token buffer */
lineftell = ftell(inf);
sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
while (GETC(!=, EOF)) {
switch (c) {
/*
* Here's where it DOESN'T handle: {
* foo(a)
* {
* #ifdef notdef
* }
* #endif
* if (a)
* puts("hello, world");
* }
*/
case '{':
++level;
goto endtok;
case '}':
/*
* if level goes below zero, try and fix
* it, even though we've already messed up
*/
if (--level < 0)
level = 0;
goto endtok;
case '\n':
SETLINE;
/*
* the above 3 cases are similar in that they
* are special characters that also end tokens.
*/
endtok: if (sp > tok) {
*sp = EOS;
token = YES;
sp = tok;
}
else
token = NO;
continue;
/*
* We ignore quoted strings and character constants
* completely.
*/
case '"':
case '\'':
(void)skip_string(c);
break;
/*
* comments can be fun; note the state is unchanged after
* return, in case we found:
* "foo() XX comment XX { int bar; }"
*/
case '/':
if (GETC(==, '*')) {
skip_comment(c);
continue;
} else if (c == '/') {
skip_comment(c);
continue;
}
(void)ungetc(c, inf);
c = '/';
goto storec;
/* hash marks flag #define's. */
case '#':
if (sp == tok) {
hash_entry();
break;
}
goto storec;
/*
* if we have a current token, parenthesis on
* level zero indicates a function.
*/
case '(':
do c = getc(inf);
while (c != EOF && iswhite(c));
if (c == '*')
break;
if (c != EOF)
ungetc(c, inf);
if (!level && token) {
int curline;
if (sp != tok)
*sp = EOS;
/*
* grab the line immediately, we may
* already be wrong, for example,
* foo\n
* (arg1,
*/
get_line();
curline = lineno;
if (func_entry()) {
++level;
pfnote(tok, curline);
}
break;
}
goto storec;
/*
* semi-colons indicate the end of a typedef; if we find a
* typedef we search for the next semi-colon of the same
* level as the typedef. Ignoring "structs", they are
* tricky, since you can find:
*
* "typedef long time_t;"
* "typedef unsigned int u_int;"
* "typedef unsigned int u_int [10];"
*
* If looking at a typedef, we save a copy of the last token
* found. Then, when we find the ';' we take the current
* token if it starts with a valid token name, else we take
* the one we saved. There's probably some reasonable
* alternative to this...
*/
case ';':
if (t_def && level == t_level) {
t_def = NO;
get_line();
if (sp != tok)
*sp = EOS;
pfnote(tok, lineno);
break;
}
goto storec;
/*
* store characters until one that can't be part of a token
* comes along; check the current token against certain
* reserved words.
*/
default:
storec: if (c == EOF)
break;
if (!intoken(c)) {
if (sp == tok)
break;
*sp = EOS;
if (tflag) {
/* no typedefs inside typedefs */
if (!t_def &&
!memcmp(tok, "typedef",8)) {
t_def = YES;
t_level = level;
break;
}
/* catch "typedef struct" */
if ((!t_def || t_level <= level)
&& (!memcmp(tok, "struct", 7)
|| !memcmp(tok, "union", 6)
|| !memcmp(tok, "enum", 5))) {
/*
* get line immediately;
* may change before '{'
*/
get_line();
if (str_entry(c))
++level;
break;
/* } */
}
}
sp = tok;
}
else if (sp != tok || begtoken(c)) {
if (sp < tok + sizeof tok)
*sp++ = c;
token = YES;
}
continue;
}
sp = tok;
token = NO;
}
}
/*
* func_entry --
* handle a function reference
*/
static int
func_entry(void)
{
int c; /* current character */
int level = 0; /* for matching '()' */
static char attribute[] = "__attribute__";
char maybe_attribute[sizeof attribute + 1],
*anext;
/*
* Find the end of the assumed function declaration.
* Note that ANSI C functions can have type definitions so keep
* track of the parentheses nesting level.
*/
while (GETC(!=, EOF)) {
switch (c) {
case '\'':
case '"':
/* skip strings and character constants */
skip_string(c);
break;
case '/':
/* skip comments */
if (GETC(==, '*'))
skip_comment(c);
else if (c == '/')
skip_comment(c);
break;
case '(':
level++;
break;
case ')':
if (level == 0)
goto fnd;
level--;
break;
case '\n':
SETLINE;
}
}
return (NO);
fnd:
/*
* we assume that the character after a function's right paren
* is a token character if it's a function and a non-token
* character if it's a declaration. Comments don't count...
*/
for (anext = maybe_attribute;;) {
while (GETC(!=, EOF) && iswhite(c))
if (c == '\n')
SETLINE;
if (c == EOF)
return NO;
/*
* Recognize the gnu __attribute__ extension, which would
* otherwise make the heuristic test DTWT
*/
if (anext == maybe_attribute) {
if (intoken(c)) {
*anext++ = c;
continue;
}
} else {
if (intoken(c)) {
if (anext - maybe_attribute
< (ptrdiff_t)(sizeof attribute - 1))
*anext++ = c;
else break;
continue;
} else {
*anext++ = '\0';
if (strcmp(maybe_attribute, attribute) == 0) {
(void)ungetc(c, inf);
return NO;
}
break;
}
}
if (intoken(c) || c == '{')
break;
if (c == '/' && GETC(==, '*'))
skip_comment(c);
else if (c == '/')
skip_comment(c);
else { /* don't ever "read" '/' */
(void)ungetc(c, inf);
return (NO);
}
}
if (c != '{')
(void)skip_key('{');
return (YES);
}
/*
* hash_entry --
* handle a line starting with a '#'
*/
static void
hash_entry(void)
{
int c; /* character read */
int curline; /* line started on */
char *sp; /* buffer pointer */
char tok[MAXTOKEN]; /* storage buffer */
curline = lineno;
do if (GETC(==, EOF))
return;
while(c != '\n' && iswhite(c));
ungetc(c, inf);
for (sp = tok;;) { /* get next token */
if (GETC(==, EOF))
return;
if (iswhite(c))
break;
if (sp < tok + sizeof tok)
*sp++ = c;
}
if(sp >= tok + sizeof tok)
--sp;
*sp = EOS;
if (memcmp(tok, "define", 6)) /* only interested in #define's */
goto skip;
for (;;) { /* this doesn't handle "#define \n" */
if (GETC(==, EOF))
return;
if (!iswhite(c))
break;
}
for (sp = tok;;) { /* get next token */
if(sp < tok + sizeof tok)
*sp++ = c;
if (GETC(==, EOF))
return;
/*
* this is where it DOESN'T handle
* "#define \n"
*/
if (!intoken(c))
break;
}
if(sp >= tok + sizeof tok)
--sp;
*sp = EOS;
if (dflag || c == '(') { /* only want macros */
get_line();
pfnote(tok, curline);
}
skip: if (c == '\n') { /* get rid of rest of define */
SETLINE
if (*(sp - 1) != '\\')
return;
}
(void)skip_key('\n');
}
/*
* str_entry --
* handle a struct, union or enum entry
*/
static int
str_entry(int c /* current character */)
{
int curline; /* line started on */
char *sp; /* buffer pointer */
char tok[LINE_MAX]; /* storage buffer */
curline = lineno;
while (iswhite(c))
if (GETC(==, EOF))
return (NO);
if (c == '{') /* it was "struct {" */
return (YES);
for (sp = tok;;) { /* get next token */
*sp++ = c;
if (GETC(==, EOF))
return (NO);
if (!intoken(c))
break;
}
switch (c) {
case '{': /* it was "struct foo{" */
--sp;
break;
case '\n': /* it was "struct foo\n" */
SETLINE;
/*FALLTHROUGH*/
default: /* probably "struct foo " */
while (GETC(!=, EOF))
if (!iswhite(c))
break;
if (c != '{') {
(void)ungetc(c, inf);
return (NO);
}
}
*sp = EOS;
pfnote(tok, curline);
return (YES);
}
/*
* skip_comment --
* skip over comment
*/
void
skip_comment(int commenttype)
{
int c; /* character read */
int star; /* '*' flag */
for (star = 0; GETC(!=, EOF);)
switch(c) {
/* comments don't nest, nor can they be escaped. */
case '*':
star = YES;
break;
case '/':
if (commenttype == '*' && star)
return;
break;
case '\n':
if (commenttype == '/') {
/*
* we don't really parse C, so sometimes it
* is necessary to see the newline
*/
ungetc(c, inf);
return;
}
SETLINE;
/*FALLTHROUGH*/
default:
star = NO;
break;
}
}
/*
* skip_string --
* skip to the end of a string or character constant.
*/
void
skip_string(int key)
{
int c,
skip;
for (skip = NO; GETC(!=, EOF); )
switch (c) {
case '\\': /* a backslash escapes anything */
skip = !skip; /* we toggle in case it's "\\" */
break;
case '\n':
SETLINE;
/*FALLTHROUGH*/
default:
if (c == key && !skip)
return;
skip = NO;
}
}
/*
* skip_key --
* skip to next char "key"
*/
int
skip_key(int key)
{
int c,
skip,
retval;
for (skip = retval = NO; GETC(!=, EOF);)
switch(c) {
case '\\': /* a backslash escapes anything */
skip = !skip; /* we toggle in case it's "\\" */
break;
case ';': /* special case for yacc; if one */
case '|': /* of these chars occurs, we may */
retval = YES; /* have moved out of the rule */
break; /* not used by C */
case '\'':
case '"':
/* skip strings and character constants */
skip_string(c);
break;
case '/':
/* skip comments */
if (GETC(==, '*')) {
skip_comment(c);
break;
} else if (c == '/') {
skip_comment(c);
break;
}
(void)ungetc(c, inf);
c = '/';
goto norm;
case '\n':
SETLINE;
/*FALLTHROUGH*/
default:
norm:
if (c == key && !skip)
return (retval);
skip = NO;
}
return (retval);
}