I have been working on a "JPL-compliant" version of Fexl. The JPL standard forbids the use of recursive functions in C. Currently I use this lambda substitution routine in type_sym.c, which is recursive:
// Use pattern p to make a copy of expression e with argument x substituted in // the places designated by the pattern. static value subst(value p, value e, value x) { if (p == QF) return hold(e); if (p == QT) return hold(x); return V(e->T,subst(p->L,e->L,x),subst(p->R,e->R,x)); }
The trick is to make that routine non-recursive to be in compliance with JPL. Currently I represent patterns as trees, e.g.:
A(A(hold(QF),hold(QT)),A(hold(QT),hold(QF)))
In the non-recursive version of subst, I will represent patterns as a linear series of op codes, where each op code is of type:
typedef void (*opcode)(void);
For the sake of this discussion I represent each op code using the letters "ATF.", but I don't actually use letters in the implementation. So an example pattern might look like:
"AAFT.ATF.."
When the parser builds a lambda pattern, it will allocate a fixed work area to be used for substitution with that pattern at run time. The size of this work area is known in advance based on the op codes in the pattern.
Within the work area I use two stacks, and input stack (si) and an output stack (so). However, I combine the two stacks into a single array (work), with (so) followed by (si).
When the subst routine is called, it follows the op codes in the pattern from left to right, and each op code does a very simple operation in the (work) array. The routine is branchless, with the sole exception of checking loop termination.
To determine the requisite size of the total work area (work), and the embedded stacks (so) and (si) within that work area, I have analyzed the "highwater" behavior of any valid sequence of op codes.
Let nA, nT, nF be the number of 'A', 'T', 'F' op codes in a pattern. Then the minimum sizes of the (so) and (si) stacks are:
size_so = nT + nF size_si = 1 + 2*nA
For example with p = "AAFT.ATF.." we have:
nA = 3 nT = 2 nF = 2So in that case:
size_so = 4 size_si = 7That means I can allocate a single array (work) of size (size_so + size_si) and define logical offsets into that array for the output and input stacks:
so = work + 0; // output stack, size size_so si = work + size_so; // input stack, size size_si
Note by the way that the total size (size_so + size_si) is equal to 1 plus the number of op codes in the pattern.
In practice I don't actually define the (so) and (si) stack pointers. Intead I use two index variables into the main (work) array:
unsigned int no = 0; // offset of output stack unsigned int ni = size_so; // offset of input stack
By incorporating the offset size_so into the index ni, I don't need (so) and (si), and all the operations just do things in the (work) array.
Here is the test harness I created for the new subst routine, in a file called "test_pattern.c".
#include <value.h> #include <basic.h> #include <stddef.h> #include <memory.h> #include <output.h> #include <show.h> #include <string.h> #include <test_pattern.h> #include <type_num.h> // Lambda substitution engine typedef void (*opcode)(void); struct pattern { unsigned int size; // size of the ops and work vectors unsigned int size_so; // size of the output stack within work opcode *ops; value *work; }; static value *work; // current work area static unsigned int no; // offset of output stack static unsigned int ni; // offset of input stack static value curr_x; // current subject value x to be substituted // Push right and left sides of top input. static void op_A(void) { value E = work[ni]; work[ni+1] = E->R; work[ni+2] = E->L; ni += 2; } // Keep the current x. static void op_T(void) { work[no] = hold(curr_x); ni--; no++; } // Keep the top input. static void op_F(void) { work[no] = hold(work[ni]); ni--; no++; } // Combine left and right outputs into new output. static void op_dot(void) { value E = work[ni]; value L = work[no-2]; value R = work[no-1]; work[no-2] = V(E->T,L,R); ni--; no--; } // Use pattern p to make a copy of expression e with argument x substituted in // the places designated by the pattern. static value subst(struct pattern *p, value e, value x) { work = p->work; no = 0; ni = p->size_so; curr_x = x; work[ni] = e; // I use a do loop because I know the first op code cannot be 0. { opcode *ops = p->ops; opcode op = *ops; do { op(); op = *(++ops); } while (op); } return work[0]; } // The following functions are only for testing patterns expressed as strings. // They won't be in the production code. static unsigned int map_chars_to_ops(const char *s_ops, opcode *ops) { unsigned int size_so = 0; while (1) { switch (*(s_ops++)) { case 'A': *(ops++) = op_A; continue; case 'T': *(ops++) = op_T; size_so++; continue; case 'F': *(ops++) = op_F; size_so++; continue; case '.': *(ops++) = op_dot; continue; } *ops = 0; // sentinel return size_so; } } // Assumes the s_ops string is properly formed. static struct pattern *string_to_pattern(const char *s_ops) { size_t size = 1 + strlen(s_ops); value *work = new_memory(size * sizeof(value)); struct pattern *p = new_memory(sizeof(struct pattern)); opcode *ops = new_memory(size * sizeof(opcode)); unsigned int size_so = map_chars_to_ops(s_ops,ops); *p = (struct pattern){ size, size_so, ops, work }; return p; } static void free_pattern(struct pattern *p) { free_memory(p->ops, p->size * sizeof(opcode)); free_memory(p->work, p->size * sizeof(value)); free_memory(p, sizeof(struct pattern)); } static void try_pattern(const char *s_ops, value e, value x) { struct pattern *p = string_to_pattern(s_ops); value r = subst(p,e,x); put("p = ");put(s_ops);nl(); show_line("e = ",e); show_line("x = ",x); show_line("r = ",r); nl(); free_pattern(p); drop(r); drop(e); drop(x); } void test_patterns(void) { try_pattern ( "T", hold(QI), Qnum(3) ); try_pattern ( "F", hold(QI), Qnum(3) ); try_pattern ( "AFT.", A(hold(QI),hold(QI)), Qnum(3) ); try_pattern ( "ATF.", A(hold(QI),hold(QI)), Qnum(3) ); try_pattern ( "AAFT.ATF..", A(A(hold(QI),hold(QI)),V(type_I,hold(QI),hold(QI))), Qnum(3) ); }
The output is:
p = T e = [I] x = [num 3] r = [num 3] p = F e = [I] x = [num 3] r = [I] p = AFT. e = [A [I] [I]] x = [num 3] r = [A [I] [num 3]] p = ATF. e = [A [I] [I]] x = [num 3] r = [A [num 3] [I]] p = AAFT.ATF.. e = [A [A [I] [I]] [I [I] [I]]] x = [num 3] r = [A [A [I] [num 3]] [I [num 3] [I]]]
The use of opcodes generates a very tight loop in the subst routine. Consider the C code that loops through the op codes:
do { op(); op = *(++ops); } while (op);
In the assembler output, that loop becomes:
jmp .L18 .p2align 4,,10 .p2align 3 .L23: addq $8, %rbx .L18: call *%rax movq (%rbx), %rax testq %rax, %rax jne .L23
That was assembled with:
gcc -S -I . -O3 test_pattern.c
At some point I could generate custom ASM code for any given pattern and jump right into it with a single return address in a register. But that has platform dependencies, and I should start with the op code technique anyway.