Show all entries

Tue 2023-03-28

Outline of non-recursive version of subst in type_sym.c

I have been working on a "JPL-compliant" version of Fexl. The JPL standard forbids the use of recursive functions in C. Currently I use this lambda substitution routine in type_sym.c, which is recursive:

// Use pattern p to make a copy of expression e with argument x substituted in
// the places designated by the pattern.

static value subst(value p, value e, value x)
    {
    if (p == QF) return hold(e);
    if (p == QT) return hold(x);
    return V(e->T,subst(p->L,e->L,x),subst(p->R,e->R,x));
    }

The trick is to make that routine non-recursive to be in compliance with JPL. Currently I represent patterns as trees, e.g.:

A(A(hold(QF),hold(QT)),A(hold(QT),hold(QF)))

In the non-recursive version of subst, I will represent patterns as a linear series of op codes, where each op code is of type:

typedef void (*opcode)(void);

For the sake of this discussion I represent each op code using the letters "ATF.", but I don't actually use letters in the implementation. So an example pattern might look like:

"AAFT.ATF.."

When the parser builds a lambda pattern, it will allocate a fixed work area to be used for substitution with that pattern at run time. The size of this work area is known in advance based on the op codes in the pattern.

Within the work area I use two stacks, and input stack (si) and an output stack (so). However, I combine the two stacks into a single array (work), with (so) followed by (si).

When the subst routine is called, it follows the op codes in the pattern from left to right, and each op code does a very simple operation in the (work) array. The routine is branchless, with the sole exception of checking loop termination.

To determine the requisite size of the total work area (work), and the embedded stacks (so) and (si) within that work area, I have analyzed the "highwater" behavior of any valid sequence of op codes.

Let nA, nT, nF be the number of 'A', 'T', 'F' op codes in a pattern. Then the minimum sizes of the (so) and (si) stacks are:

size_so = nT + nF
size_si = 1 + 2*nA

For example with p = "AAFT.ATF.." we have:

nA = 3
nT = 2
nF = 2
So in that case:
size_so = 4
size_si = 7
That means I can allocate a single array (work) of size (size_so + size_si) and define logical offsets into that array for the output and input stacks:
so = work + 0;        // output stack, size size_so
si = work + size_so;  // input stack,  size size_si

Note by the way that the total size (size_so + size_si) is equal to 1 plus the number of op codes in the pattern.

In practice I don't actually define the (so) and (si) stack pointers. Intead I use two index variables into the main (work) array:

unsigned int no = 0;       // offset of output stack
unsigned int ni = size_so; // offset of input stack

By incorporating the offset size_so into the index ni, I don't need (so) and (si), and all the operations just do things in the (work) array.

Operation

Here is the test harness I created for the new subst routine, in a file called "test_pattern.c".

#include <value.h>

#include <basic.h>
#include <stddef.h>
#include <memory.h>
#include <output.h>
#include <show.h>
#include <string.h>
#include <test_pattern.h>
#include <type_num.h>

// Lambda substitution engine

typedef void (*opcode)(void);

struct pattern
    {
    unsigned int size;    // size of the ops and work vectors
    unsigned int size_so; // size of the output stack within work
    opcode *ops;
    value *work;
    };

static value *work;     // current work area
static unsigned int no; // offset of output stack
static unsigned int ni; // offset of input stack
static value curr_x;    // current subject value x to be substituted

// Push right and left sides of top input.
static void op_A(void)
    {
    value E = work[ni];
    work[ni+1] = E->R;
    work[ni+2] = E->L;
    ni += 2;
    }

// Keep the current x.
static void op_T(void)
    {
    work[no] = hold(curr_x);
    ni--;
    no++;
    }

// Keep the top input.
static void op_F(void)
    {
    work[no] = hold(work[ni]);
    ni--;
    no++;
    }

// Combine left and right outputs into new output.
static void op_dot(void)
    {
    value E = work[ni];
    value L = work[no-2];
    value R = work[no-1];
    work[no-2] = V(E->T,L,R);
    ni--;
    no--;
    }

// Use pattern p to make a copy of expression e with argument x substituted in
// the places designated by the pattern.

static value subst(struct pattern *p, value e, value x)
    {
    work = p->work;
    no = 0;
    ni = p->size_so;
    curr_x = x;

    work[ni] = e;

    // I use a do loop because I know the first op code cannot be 0.
    {
    opcode *ops = p->ops;
    opcode op = *ops;

    do
        {
        op();
        op = *(++ops);
        }
        while (op);
    }

    return work[0];
    }

// The following functions are only for testing patterns expressed as strings.
// They won't be in the production code.

static unsigned int map_chars_to_ops(const char *s_ops, opcode *ops)
    {
    unsigned int size_so = 0;
    while (1)
        {
        switch (*(s_ops++))
            {
            case 'A':
                *(ops++) = op_A;
                continue;
            case 'T':
                *(ops++) = op_T;
                size_so++;
                continue;
            case 'F':
                *(ops++) = op_F;
                size_so++;
                continue;
            case '.':
                *(ops++) = op_dot;
                continue;
            }
        *ops = 0; // sentinel
        return size_so;
        }
    }

// Assumes the s_ops string is properly formed.
static struct pattern *string_to_pattern(const char *s_ops)
    {
    size_t size = 1 + strlen(s_ops);
    value *work = new_memory(size * sizeof(value));
    struct pattern *p = new_memory(sizeof(struct pattern));
    opcode *ops = new_memory(size * sizeof(opcode));
    unsigned int size_so = map_chars_to_ops(s_ops,ops);

    *p = (struct pattern){ size, size_so, ops, work };
    return p;
    }

static void free_pattern(struct pattern *p)
    {
    free_memory(p->ops, p->size * sizeof(opcode));
    free_memory(p->work, p->size * sizeof(value));
    free_memory(p, sizeof(struct pattern));
    }

static void try_pattern(const char *s_ops, value e, value x)
    {
    struct pattern *p = string_to_pattern(s_ops);
    value r = subst(p,e,x);

    put("p = ");put(s_ops);nl();
    show_line("e = ",e);
    show_line("x = ",x);
    show_line("r = ",r);
    nl();

    free_pattern(p);
    drop(r);
    drop(e);
    drop(x);
    }

void test_patterns(void)
    {
    try_pattern
        (
        "T",
        hold(QI),
        Qnum(3)
        );
    try_pattern
        (
        "F",
        hold(QI),
        Qnum(3)
        );
    try_pattern
        (
        "AFT.",
        A(hold(QI),hold(QI)),
        Qnum(3)
        );
    try_pattern
        (
        "ATF.",
        A(hold(QI),hold(QI)),
        Qnum(3)
        );
    try_pattern
        (
        "AAFT.ATF..",
        A(A(hold(QI),hold(QI)),V(type_I,hold(QI),hold(QI))),
        Qnum(3)
        );
    }

The output is:

p = T
e = [I]
x = [num 3]
r = [num 3]

p = F
e = [I]
x = [num 3]
r = [I]

p = AFT.
e = [A [I] [I]]
x = [num 3]
r = [A [I] [num 3]]

p = ATF.
e = [A [I] [I]]
x = [num 3]
r = [A [num 3] [I]]

p = AAFT.ATF..
e = [A [A [I] [I]] [I [I] [I]]]
x = [num 3]
r = [A [A [I] [num 3]] [I [num 3] [I]]]

Efficiency

The use of opcodes generates a very tight loop in the subst routine. Consider the C code that loops through the op codes:

    do
        {
        op();
        op = *(++ops);
        }
        while (op);

In the assembler output, that loop becomes:

    jmp .L18
    .p2align 4,,10
    .p2align 3
.L23:
    addq    $8, %rbx
.L18:
    call    *%rax
    movq    (%rbx), %rax
    testq   %rax, %rax
    jne .L23

That was assembled with:

gcc -S -I . -O3 test_pattern.c

Idea: Generating custom ASM code on the fly

At some point I could generate custom ASM code for any given pattern and jump right into it with a single return address in a register. But that has platform dependencies, and I should start with the op code technique anyway.