From e79ba0056beadc169fb2679d1d0874379d9e0e2f Mon Sep 17 00:00:00 2001
From: Lephenixnoir <sebastien.michelland@protonmail.com>
Date: Wed, 28 Aug 2024 10:43:23 +0200
Subject: [PATCH] juic: add basic unit tests to consolidate

This way when I get the record constructors right (which I think I have
in my mind now) I can keep around non-regression tests.
---
 doc/jui-lang.txt             | 13 +----
 juic/builtins.py             |  7 ++-
 juic/datatypes.py            | 94 ++++++++++++++++++++++++++++++++++--
 juic/eval.py                 | 78 +++++++++++-------------------
 juic/examples/unit_tests.jui | 29 +++++++++++
 juic/main.py                 | 16 ++++--
 juic/parser.py               | 48 +++++++++++++++---
 7 files changed, 207 insertions(+), 78 deletions(-)
 create mode 100644 juic/examples/unit_tests.jui

diff --git a/doc/jui-lang.txt b/doc/jui-lang.txt
index 01b8399..7e70c0e 100644
--- a/doc/jui-lang.txt
+++ b/doc/jui-lang.txt
@@ -156,6 +156,8 @@ BIG TODO:
 - Don't have a scope and ability to let- or fun-declare inside function body!!
 - `rec x() = y {}; x {}` is it of record type `x` or `y`? How to say that `x`
   is based on `y`?
+  -> it's of type y, which is derived from x and may or may not have custom
+     codegen that overrides that of x
 
 IMPLEMENTATION TODO:
 - List syntax
@@ -177,17 +179,6 @@ The syntax for assigning at labels is a bit dodgy at the moment. It should be
   -> function-based interface param("FX")
 - "Control-flow": for(each) loops, what else needed for basic ops?
 
-Limitation of lazy record construction: if you use a function in the middle of
-a record construction, the record update `<{ }` will force the evaluation of
-fields even though it would be possible not to force that.
-
-```
-fun f(e) = e <{ width: e.width + 10 };
-rec r1(x) = f <| base { width: x };
-rec r2(x) = r1 { height: this.width; x };
-r2 { 10; }
-```
-
 Capturing "this" in a local variable to get a snapshot is required, otherwise
 we can't refer to parents.
 
diff --git a/juic/builtins.py b/juic/builtins.py
index fbb9d84..23b45fd 100644
--- a/juic/builtins.py
+++ b/juic/builtins.py
@@ -9,12 +9,15 @@ def juiLen(x):
         raise JuiTypeError("len")
     return len(x)
 
+R_record = RecordType("record", None)
+R_subrecord = RecordType("subrecord", R_record)
+
 builtinClosure = Closure(parent=None, scope=MutableScopeData(defs={
     "print": (0, BuiltinFunction(juiPrint)),
     "len": (0, BuiltinFunction(juiLen)),
 
     # TODO: Remove the built-in record type "record" used for testing
-    "record": (0, RecordType.makePlainRecordType()),
-    "subrecord": (0, RecordType.makePlainRecordType()),
+    "record": (0, R_record),
+    "subrecord": (0, R_subrecord),
 
 }, timestamp=1))
diff --git a/juic/datatypes.py b/juic/datatypes.py
index 270da85..654cc75 100644
--- a/juic/datatypes.py
+++ b/juic/datatypes.py
@@ -35,11 +35,12 @@ class Function:
 
 @dataclass
 class RecordType:
-    # TODO: Record prototypes?
+    # Record name
+    name: str
+    # Base type for inheritance/subconstructor logic
+    base: Union[None, "RecordType"]
 
-    @staticmethod
-    def makePlainRecordType():
-        return RecordType()
+    # TODO: Record prototypes?
 
 # @dataclass
 # class RecordCtor:
@@ -99,3 +100,88 @@ def juiIsConstructible(value):
 
 def juiIsRecordUpdatable(value):
     return type(value) == Record
+
+# Generic functions on values
+
+# String representation of a value
+def juiValueString(v):
+    match v:
+        case None:
+            return "null"
+        case bool():
+            return str(v).lower()
+        case int() | float():
+            return str(v)
+        case str():
+            return repr(v)
+        case CXXQualid():
+            return "&" + v.qualid
+        case list():
+            return "[" + ", ".join(juiValueString(x) for x in v) + "]"
+        case BuiltinFunction():
+            return str(v)
+        case Function():
+            p = v.params + (["..." + v.variadic] if v.variadic else [])
+            s = "fun(" + ", ".join(p) + ") => " + str(v.body)
+            s += " (in some closure)"
+            return s
+        case RecordType() as rt:
+            return str(rt)
+        case Record() as r:
+            s = r.base.name + " {"
+            s += "; ".join(x + ": " + juiValueString(y) for x, y in r.attr.items())
+            s += "; ".join(juiValueString(x) for x in r.children)
+            return s + "}"
+            raise NotImplementedError
+        case Thunk() as th:
+            return str(th)
+        case PartialRecordSnapshot() as prs:
+            return str(prs)
+        case _:
+            raise NotImplementedError
+
+# Check whether two *forced* values are equal
+def juiValuesEqual(v1, v2):
+    match v1, v2:
+        case None, None:
+            return True
+        case bool(), bool():
+            return v1 == v2
+        case int(), int():
+            return v1 == v2
+        case float(), float():
+            return v1 == v2
+        case str(), str():
+            return v1 == v2
+        case CXXQualid(), CXXQualid():
+            return v1.qualid == v2.qualid
+        case list() as l1, list() as l2:
+            return len(l1) == len(l2) and \
+                all(juiValuesEqual(v1, v2) for v1, v2 in zip(l1, l2))
+        case BuiltinFunction(), BuiltinFunction():
+            return id(v1) == id(v2)
+        case Function(), Function():
+            raise Exception("cannot compare functions")
+        case RecordType(), RecordType():
+            return id(v1) == id(v2)
+        case Record() as r1, Record() as r2:
+            if not juiValuesEqual(r1.base, r2.base):
+                return False
+            if r1.attr.keys() != r2.attr.keys():
+                return False
+            if any(not juiValuesEqual(r1.attr[k], r2.attr[k]) for k in r1.attr):
+                return False
+            if len(r1.children) != len(r2.children):
+                return False
+            if any(not juiValuesEqual(v1, v2)
+                   for v1, v2 in zip(r1.children, r2.children)):
+                return False
+            return True
+        case Thunk() as th1, Thunk() as th2:
+            assert th1.evaluated and not th1.invalid
+            assert th2.evaluated and not th2.invalid
+            return juiValuesEqual(th1.result, th2.result)
+        case PartialRecordSnapshot(), PartialRecordSnapshot():
+            raise Exception("cannot compare PRSs")
+        case _, _:
+            raise Exception(f"invalid types for comparison: {type(v1)}, {type(v2)}")
diff --git a/juic/eval.py b/juic/eval.py
index 4334a5d..cddb46d 100644
--- a/juic/eval.py
+++ b/juic/eval.py
@@ -104,6 +104,21 @@ class Thunk:
     # Whether the thunk is currently under evaluation
     _running: bool = False
 
+    def __str__(self):
+        s = "Thunk("
+        if th._running:
+            s += "running, "
+        if th.evaluated:
+            s += "evaluated"
+            if th.invalid:
+                s += ", invalid"
+            else:
+                s += " = " + juiValueString(th.result)
+        else:
+            s += "unevaluated"
+        s += "){ " + str(th.ast) + " }"
+        return s
+
 # Object representing the interpretation of a partially-built record. In a
 # record construction or update operation, fields of `this` refer to the latest
 # definition before the point of use, if there is one, or the definition in the
@@ -121,58 +136,12 @@ class PartialRecordSnapshot:
     # Base object for fields not captured in `fieldThunks`
     base: None | Thunk = None
 
+    def __str__(self):
+        return "<A PRS AAAAAAH>" # TODO
+
     def copy(self):
         return PartialRecordSnapshot(self.fieldThunks.copy(), self.base)
 
-# TODO: Make this a general value printing function
-def juiValueString(v):
-    match v:
-        case None:
-            return "null"
-        case bool():
-            return str(v).lower()
-        case int() | float():
-            return str(v)
-        case str():
-            return repr(v)
-        case CXXQualid():
-            return "&" + v.qualid
-        case list():
-            return "[" + ", ".join(juiValueString(x) for x in v) + "]"
-        case BuiltinFunction():
-            return str(v)
-        case Function():
-            p = v.params + (["..." + v.variadic] if v.variadic else [])
-            s = "fun(" + ", ".join(p) + ") => " + str(v.body)
-            s += " (in some closure)"
-            return s
-        case RecordType():
-            return "<RecordType>"
-        case Record() as r:
-            s = juiValueString(r.base) + " " + "{"
-            s += "; ".join(x + ": " + juiValueString(y) for x, y in r.attr.items())
-            s += "; ".join(juiValueString(x) for x in r.children)
-            return s + "}"
-            raise NotImplementedError
-        case Thunk() as th:
-            s = "Thunk("
-            if th._running:
-                s += "running, "
-            if th.evaluated:
-                s += "evaluated"
-                if th.invalid:
-                    s += ", invalid"
-                else:
-                    s += " = " + juiValueString(th.result)
-            else:
-                s += "unevaluated"
-            s += "){ " + str(th.ast) + " }"
-            return s
-        case PartialRecordSnapshot():
-            return "<A PRS AAAAAAH>"
-        case _:
-            raise NotImplementedError
-
 class JuiTypeError(Exception):
     pass
 
@@ -421,6 +390,17 @@ class Context:
             case Node.T.SET_STMT, _:
                 raise NotImplementedError
 
+            case Node.T.UNIT_TEST, [subject, expected]:
+                vs = self.evalExpr(subject)
+                ve = self.evalExpr(expected)
+                vs = self.force(vs)
+                ve = self.force(ve)
+                if not juiValuesEqual(vs, ve):
+                    print("unit test failed:")
+                    print("  " + str(vs))
+                    print("vs.")
+                    print("  " + str(ve))
+
             case _, _:
                 return self.evalExpr(node)
 
diff --git a/juic/examples/unit_tests.jui b/juic/examples/unit_tests.jui
new file mode 100644
index 0000000..90c347c
--- /dev/null
+++ b/juic/examples/unit_tests.jui
@@ -0,0 +1,29 @@
+null;
+//^ null;
+
+1+2;
+//^ 3;
+
+let zero = 0;
+zero + 2 * 20 - 8 / 4;
+//^ 38;
+
+"hello" + "," + "world";
+//^ "hello,world";
+
+if(2 < 4) 8;
+//^ 8;
+
+if(2 > 4) 8;
+//^ null;
+
+if(2 > 4) 8 else 14;
+//^ 14;
+
+let z = 4;
+(z + 10) * 3;
+//^ 42;
+
+let helloworld = "Hello, World!";
+len("xyz" + helloworld) + 1;
+//^ 17;
diff --git a/juic/main.py b/juic/main.py
index 5f51258..a7245b3 100644
--- a/juic/main.py
+++ b/juic/main.py
@@ -11,8 +11,9 @@ usage: juic [OPTIONS] INPUT
 JustUI high-level description compiler.
 
 Options:
-  --debug=lexer      Dump the lexer output and stop.
-         =parser     Dump the parser output and stop.
+  --debug=lexer      Dump the lexer output and stop
+         =parser     Dump the parser output and stop
+  --unit-tests       Check unit tests specified by "//^" comments
 """.strip()
 
 def usage(exitcode=None):
@@ -24,7 +25,8 @@ def usage(exitcode=None):
 def main(argv):
     # Read command-line arguments
     try:
-        opts, args = getopt.gnu_getopt(argv[1:], "", ["help", "debug="])
+        opts, args = getopt.gnu_getopt(argv[1:], "",
+            ["help", "debug=", "unit-tests"])
         opts = dict(opts)
 
         if len(argv) == 1 or "-h" in opts or "--help" in opts:
@@ -45,7 +47,8 @@ def main(argv):
         source = fp.read()
 
     try:
-        lexer = juic.parser.JuiLexer(source, args[0])
+        lexer = juic.parser.JuiLexer(source, args[0],
+            keepUnitTests="--unit-tests" in opts)
         if opts.get("--debug") == "lexer":
             lexer.dump()
             return 0
@@ -65,6 +68,11 @@ def main(argv):
 
     ctx = juic.eval.Context(juic.builtins.builtinClosure)
 
+    if "--unit-tests" in opts:
+        for e in ast.args:
+            ctx.execStmt(e)
+        return 0
+
     for e in ast.args:
         e.dump()
         v = ctx.execStmt(e)
diff --git a/juic/parser.py b/juic/parser.py
index 14419b1..eaa203b 100644
--- a/juic/parser.py
+++ b/juic/parser.py
@@ -76,12 +76,18 @@ class NaiveRegexLexer:
         if not len(self.input):
             return None
 
+        highestPriority = 0
         longestMatch = None
         longestMatchIndex = -1
 
-        for i, (regex, _, _) in enumerate(self.TOKEN_REGEX):
+        for i, (regex, _, _, *rest) in enumerate(self.TOKEN_REGEX):
+            priority = rest[0] if len(rest) else 0
+
             if (m := re.match(regex, self.input)):
-                if longestMatch is None or len(m[0]) > len(longestMatch[0]):
+                score = (priority, len(m[0]))
+                if longestMatch is None or \
+                    score > (highestPriority, len(longestMatch[0])):
+                    highestPriority = priority
                     longestMatch = m
                     longestMatchIndex = i
 
@@ -90,7 +96,7 @@ class NaiveRegexLexer:
             self.raiseError(f"unknown syntax '{nextWord}'")
 
         # Build the token
-        _, type_info, value_info = self.TOKEN_REGEX[longestMatchIndex]
+        _, type_info, value_info, *rest = self.TOKEN_REGEX[longestMatchIndex]
         m = longestMatch
 
         typ = type_info(m) if callable(type_info) else type_info
@@ -212,15 +218,17 @@ def unescape(s: str) -> str:
 
 class JuiLexer(NaiveRegexLexer):
     T = enum.Enum("T",
-        ["WS", "KW", "COMMENT",
-         "INT", "FLOAT", "STRING",
+        ["WS", "KW", "COMMENT", "UNIT_TEST_MARKER",
+         "TEXTLIT", "INT", "FLOAT", "STRING",
          "IDENT", "ATTR", "VAR", "LABEL", "FIELD", "CXXIDENT"])
 
+    RE_UTMARKER = r'//\^'
+
     RE_COMMENT  = r'(#|//)[^\n]*|/\*([^/]|/[^*])+\*/'
     RE_INT      = r'0|[1-9][0-9]*|0b[0-1]+|0o[0-7]+|0[xX][0-9a-fA-F]+'
     # RE_FLOAT    = r'([0-9]*\.[0-9]+|[0-9]+\.[0-9]*|[0-9]+)([eE][+-]?{INT})?f?'
 
-    RE_KW       = r'\b(else|fun|if|let|rec|set|this)\b'
+    RE_KW       = r'\b(else|fun|if|let|rec|set|this|null|true|false)\b'
     RE_IDENT    = r'[\w_][\w0-9_]*'
     RE_ATTR     = r'({})\s*(?:@({}))?\s*:'.format(RE_IDENT, RE_IDENT)
     RE_VAR      = r'\$(\.)?' + RE_IDENT
@@ -253,12 +261,20 @@ class JuiLexer(NaiveRegexLexer):
     ]
     TOKEN_DISCARD = lambda t: t.type in [JuiLexer.T.WS, JuiLexer.T.COMMENT]
 
+    def __init__(self, input, inputFilename, *, keepUnitTests):
+        if keepUnitTests:
+            unit_rule = (self.RE_UTMARKER, JuiLexer.T.UNIT_TEST_MARKER, None, 1)
+            self.TOKEN_REGEX.insert(0, unit_rule)
+
+        super().__init__(input, inputFilename)
+
 @dataclass
 class Node:
     T = enum.Enum("T", [
         "LIT", "IDENT", "OP", "THIS", "PROJ", "CALL", "IF", "SCOPE",
         "RECORD", "REC_ATTR", "REC_VALUE",
-        "LET_DECL", "FUN_DECL", "REC_DECL", "SET_STMT"])
+        "LET_DECL", "FUN_DECL", "REC_DECL", "SET_STMT",
+        "UNIT_TEST"])
     ctor: T
     args: list[typing.Any]
 
@@ -480,7 +496,20 @@ class JuiParser(LL1Parser):
     def scope(self):
         isNone = lambda t: t is None
         entries = self.separatedList(self.scope_stmt, sep=";", term=isNone)
-        return Node(Node.T.SCOPE, entries)
+
+        # Rearrange unit tests around their predecessors
+        entries2 = []
+        i = 0
+        while i < len(entries):
+            if i < len(entries) - 1 and entries[i+1].ctor == Node.T.UNIT_TEST:
+                entries[i+1].args[0] = entries[i]
+                entries2.append(entries[i+1])
+                i += 2
+            else:
+                entries2.append(entries[i])
+                i += 1
+
+        return Node(Node.T.SCOPE, entries2)
 
     def scope_stmt(self):
         match self.la.type, self.la.value:
@@ -490,5 +519,8 @@ class JuiParser(LL1Parser):
                 return self.fun_rec_decl()
             case JuiLexer.T.KW, "set":
                 return self.set_stmt()
+            case JuiLexer.T.UNIT_TEST_MARKER, _:
+                self.expect(JuiLexer.T.UNIT_TEST_MARKER)
+                return Node(Node.T.UNIT_TEST, [None, self.expr()])
             case _:
                 return self.expr()