Training courses

Kernel and Embedded Linux

Bootlin training courses

Embedded Linux, kernel,
Yocto Project, Buildroot, real-time,
graphics, boot time, debugging...

Bootlin logo

Elixir Cross Referencer

/*
    Generators - components that generate strings for a given regex pattern.

    For the moment undocumented, and is subject to change.
*/
module std.regex.internal.generator;

/*
    Useful utility for self-testing, an infinite range of string samples
    that _have_ to match given compiled regex.
    Caveats: supports only a simple subset of bytecode.
*/
@trusted private struct SampleGenerator(Char)
{
    import std.array : appender, Appender;
    import std.format : formattedWrite;
    import std.random : Xorshift;
    import std.regex.internal.ir : Regex, IR, IRL;
    import std.utf : isValidDchar, byChar;
    Regex!Char re;
    Appender!(char[]) app;
    uint limit, seed;
    Xorshift gen;
    //generator for pattern r, with soft maximum of threshold elements
    //and a given random seed
    this(ref Regex!Char r, uint threshold, uint randomSeed)
    {
        re = r;
        limit = threshold;
        seed = randomSeed;
        app = appender!(Char[])();
        compose();
    }

    uint rand(uint x)
    {
        uint r = gen.front % x;
        gen.popFront();
        return r;
    }

    void compose()
    {
        uint pc = 0, counter = 0, dataLenOld = uint.max;
        for (;;)
        {
            switch (re.ir[pc].code)
            {
            case IR.Char:
                    formattedWrite(app,"%s", cast(dchar) re.ir[pc].data);
                    pc += IRL!(IR.Char);
                    break;
                case IR.OrChar:
                    uint len = re.ir[pc].sequence;
                    formattedWrite(app, "%s", cast(dchar) re.ir[pc + rand(len)].data);
                    pc += len;
                    break;
                case IR.CodepointSet:
                case IR.Trie:
                    auto set = re.charsets[re.ir[pc].data];
                    auto x = rand(cast(uint) set.byInterval.length);
                    auto y = rand(set.byInterval[x].b - set.byInterval[x].a);
                    formattedWrite(app, "%s", cast(dchar)(set.byInterval[x].a+y));
                    pc += IRL!(IR.CodepointSet);
                    break;
                case IR.Any:
                    uint x;
                    do
                    {
                        x = rand(0x11_000);
                    }while (x == '\r' || x == '\n' || !isValidDchar(x));
                    formattedWrite(app, "%s", cast(dchar) x);
                    pc += IRL!(IR.Any);
                    break;
                case IR.GotoEndOr:
                    pc += IRL!(IR.GotoEndOr)+re.ir[pc].data;
                    assert(re.ir[pc].code == IR.OrEnd);
                    goto case;
                case IR.OrEnd:
                    pc += IRL!(IR.OrEnd);
                    break;
                case IR.OrStart:
                    pc += IRL!(IR.OrStart);
                    goto case;
                case IR.Option:
                    uint next = pc + re.ir[pc].data + IRL!(IR.Option);
                    uint nOpt = 0;
                    //queue next Option
                    while (re.ir[next].code == IR.Option)
                    {
                        nOpt++;
                        next += re.ir[next].data + IRL!(IR.Option);
                    }
                    nOpt++;
                    nOpt = rand(nOpt);
                    for (;nOpt; nOpt--)
                    {
                        pc += re.ir[pc].data + IRL!(IR.Option);
                    }
                    assert(re.ir[pc].code == IR.Option);
                    pc += IRL!(IR.Option);
                    break;
                case IR.RepeatStart:case IR.RepeatQStart:
                    pc += IRL!(IR.RepeatStart)+re.ir[pc].data;
                    goto case IR.RepeatEnd;
                case IR.RepeatEnd:
                case IR.RepeatQEnd:
                    uint len = re.ir[pc].data;
                    uint step = re.ir[pc+2].raw;
                    uint min = re.ir[pc+3].raw;
                    if (counter < min)
                    {
                        counter += step;
                        pc -= len;
                        break;
                    }
                    uint max = re.ir[pc+4].raw;
                    if (counter < max)
                    {
                        if (app.data.length < limit && rand(3) > 0)
                        {
                            pc -= len;
                            counter += step;
                        }
                        else
                        {
                            counter = counter%step;
                            pc += IRL!(IR.RepeatEnd);
                        }
                    }
                    else
                    {
                        counter = counter%step;
                        pc += IRL!(IR.RepeatEnd);
                    }
                    break;
                case IR.InfiniteStart, IR.InfiniteBloomStart, IR.InfiniteQStart:
                    pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
                    goto case IR.InfiniteEnd; //both Q and non-Q
                case IR.InfiniteEnd, IR.InfiniteBloomEnd, IR.InfiniteQEnd:
                    uint len = re.ir[pc].data;
                    if (app.data.length == dataLenOld)
                    {
                        pc += IRL!(IR.InfiniteEnd);
                        break;
                    }
                    dataLenOld = cast(uint) app.data.length;
                    if (app.data.length < limit && rand(3) > 0)
                        pc = pc - len;
                    else
                        pc = pc + re.ir[pc].length;
                    break;
                case IR.GroupStart, IR.GroupEnd:
                    pc += IRL!(IR.GroupStart);
                    break;
                case IR.Bol, IR.Wordboundary, IR.Notwordboundary:
                case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart:
                default:
                    return;
            }
        }
    }

    @property Char[] front()
    {
        return app.data;
    }

    enum empty = false;

    void popFront()
    {
        app.shrinkTo(0);
        compose();
    }
}

@system unittest
{
    import std.range, std.regex;
    auto re = regex(`P[a-z]{3,}q`);
    auto gen = SampleGenerator!char(re, 20, 3141592);
    static assert(isInputRange!(typeof(gen)));
    //@@@BUG@@@ somehow gen.take(1_000) doesn't work
    foreach (v; take(gen, 1_000))
        assert(v.match(re));
}