[WIP] Clean up input format versions

ldionne · ldionne · commit de8ab24c86c7 · 2025-10-03T16:45:03.000-04:00
The code representing reports (and related entities like machines, runs, etc) was greatly complicated by the presence of multiple input format versions. In particular, the "v2" input format was introduced but several parts of the codebase (e.g. `lnt importreport`) were never switched to it and were still producing reports in the "old" format. This patch simplifies the code by un-versionning the Python objects that represent Report-related entities and moving tests and other tools to produce the latest input format. However, old input formats are still supported in the sense that `lnt import` and the various importation methods still support old JSON formats, which are upgraded to the latest format before actually being submitted. TODO: - Fix the tests so they use the latest input format. - Look through anything that is still producing the old format (hint, search for elements of the old format like old classes e.g. TestSamples) - Fix remaining mentions of report_version - Look for other mentions of the report version format - Look through https://reviews.llvm.org/D65751 and https://reviews.llvm.org/D34584 - Add tests for the JSON upgrade from V1 to V2 instead of having all kinds of tests for each Python class
diff --git a/lnt/lnttool/import_report.py b/lnt/lnttool/import_report.py
@@ -28,24 +28,29 @@ def action_importreport(input, output, suite, order, machine):
     import lnt.testing
     import os
 
-    machine_info = {}
-    run_info = {'tag': suite}
-    run_info['run_order'] = order
-    machine = lnt.testing.Machine(machine,
-                                  machine_info)
+    machine = lnt.testing.Machine(machine)
+
     ctime = os.path.getctime(input.name)
     mtime = os.path.getmtime(input.name)
+    run = lnt.testing.Run(start_time=ctime, end_time=mtime,
+                          info={'llvm_project_revision': order})
 
-    run = lnt.testing.Run(ctime, mtime, run_info)
-    report = lnt.testing.Report(machine=machine, run=run, tests=[])
-
+    tests = {} # name => lnt.testing.Test
     for line in input.readlines():
         key, val = line.split()
-        metric = key.split(".")[1]
+        (testname, metric) = key.split(".")
         metric_type = float if metric not in ("hash", "profile") else str
-        test = lnt.testing.TestSamples(suite + "." + key, [val],
-                                       conv_f=metric_type)
 
-        report.tests.extend([test])
+        if testname not in tests:
+            tests[testname] = lnt.testing.Test(testname, [])
+        test = tests[testname]
+
+        samples = next((s for s in test.samples if s.metric == metric), None)
+        if samples is None:
+            test.samples.append(lnt.testing.MetricSamples(metric, []))
+            samples = test.samples[-1]
+
+        samples.add_samples([val], conv_f=metric_type)
 
+    report = lnt.testing.Report(machine=machine, run=run, tests=list(tests.values()))
     output.write(report.render())
diff --git a/lnt/testing/__init__.py b/lnt/testing/__init__.py
@@ -32,40 +32,21 @@ class Report:
     In the LNT test model, every test run should define exactly one
     machine and run, and any number of test samples.
     """
-    def __init__(self, machine, run, tests, report_version=1):
-        """Construct a LNT report file format in the given format version."""
+    def __init__(self, machine, run, tests):
+        """Construct a LNT report file."""
         self.machine = machine
         self.run = run
         self.tests = list(tests)
-        self.report_version = report_version
         self.check()
 
     def check(self):
         """Check that object members are adequate to generate an LNT
-        json report file of the version specified at construction when
-        rendering that instance.
+        json report file.
         """
-        # Check requested report version is supported by this library
-        assert self.report_version <= 2, "Only v2 or older LNT report format supported."
-
         assert isinstance(self.machine, Machine), "Unexpected type for machine."
-        assert (
-            self.machine.report_version == self.report_version
-        ), "Mismatch between machine and report version."
-
         assert isinstance(self.run, Run), "Unexpected type for run."
-        assert (
-            self.run.report_version == self.report_version
-        ), "Mismatch between run and report version."
-
         for t in self.tests:
-            if self.report_version == 2:
-                assert isinstance(t, Test), "Unexpected type for test"
-                assert (
-                    t.report_version == self.report_version
-                ), "Mismatch between test and report version."
-            else:
-                assert isinstance(t, TestSamples), "Unexpected type for test samples."
+            assert isinstance(t, Test), "Unexpected type for test"
 
     def update_report(self, new_tests_samples, end_time=None):
         """Add extra samples to this report, and update the end time of
@@ -77,21 +58,14 @@ def update_report(self, new_tests_samples, end_time=None):
         self.check()
 
     def render(self, indent=4):
-        """Return a LNT json report file format of the version specified
-        at construction as a string, where each object is indented by
-        indent spaces compared to its parent.
+        """Return a LNT json report file as a string, where each object is
+        indented by spaces compared to its parent.
         """
-        if self.report_version == 2:
-            return json.dumps({'format_version': str(self.report_version),
-                               'machine': self.machine.render(),
-                               'run': self.run.render(),
-                               'tests': [t.render() for t in self.tests]},
-                              sort_keys=True, indent=indent)
-        else:
-            return json.dumps({'Machine': self.machine.render(),
-                               'Run': self.run.render(),
-                               'Tests': [t.render() for t in self.tests]},
-                              sort_keys=True, indent=indent)
+        return json.dumps({'format_version': '2',
+                            'machine': self.machine.render(),
+                            'run': self.run.render(),
+                            'tests': [t.render() for t in self.tests]},
+                            sort_keys=True, indent=indent)
 
 
 class Machine:
@@ -104,44 +78,25 @@ class Machine:
     Machines entries in the database are uniqued by their name and the
     entire contents of the info dictionary.
     """
-    def __init__(self, name, info={}, report_version=1):
+    def __init__(self, name, info={}):
         self.name = str(name)
         self.info = dict((str(key), str(value))
                          for key, value in info.items())
-        self.report_version = report_version
-        self.check()
-
-    def check(self):
-        """Check object members are adequate to generate an LNT json
-        report file of the version specified at construction when
-        rendering that instance.
-        """
-        # Check requested version is supported by this library
-        assert (
-            self.report_version <= 2
-        ), "Only v2 or older supported for LNT report format Machine objects."
 
     def render(self):
         """Return info from this instance in a dictionary that respects
-        the LNT report format in the version specified at construction
-        when printed as json.
+        the LNT JSON report format.
         """
-        if self.report_version == 2:
-            d = dict(self.info)
-            d['Name'] = self.name
-            return d
-        else:
-            return {'Name': self.name,
-                    'Info': self.info}
+        d = dict(self.info)
+        d['name'] = self.name
+        return d
 
 
 class Run:
     """Information on the particular test run.
 
     At least one parameter must be supplied and is used as ordering
-    among several runs. When generating a report in format 1 or earlier,
-    both start_time and end_time are used for that effect and the
-    current date is used if their value is None.
+    among several runs.
 
     As with Machine, the info dictionary can be used to describe
     additional information on the run. This dictionary should be used to
@@ -151,12 +106,7 @@ class Run:
     which could be useful in analysis, for example the current machine
     load.
     """
-    def __init__(self, start_time=None, end_time=None, info={}, report_version=1):
-        if report_version <= 1:
-            if start_time is None:
-                start_time = datetime.datetime.utcnow()
-            if end_time is None:
-                end_time = datetime.datetime.utcnow()
+    def __init__(self, start_time=None, end_time=None, info={}):
         self.start_time = normalize_time(start_time) if start_time is not None else None
         self.end_time = normalize_time(end_time) if end_time is not None else None
         self.info = dict()
@@ -165,68 +115,32 @@ def __init__(self, start_time=None, end_time=None, info={}, report_version=1):
             key = str(key)
             value = str(value)
             self.info[key] = value
-        self.report_version = report_version
-        if self.report_version <= 1:
-            if 'tag' not in self.info:
-                raise ValueError("Missing 'tag' entry in 'info' dictionary")
-            if 'run_order' not in self.info:
-                raise ValueError("Missing 'run_order' entry in 'info' dictionary")
-        else:
-            if 'llvm_project_revision' not in self.info:
-                raise ValueError("Missing 'llvm_project_revision' entry in 'info' dictionary")
-        if '__report_version__' in info:
-            raise ValueError("'__report_version__' key is reserved")
-        if report_version == 1:
-            self.info['__report_version__'] = '1'
-        self.check()
 
-    def check(self):
-        """Check object members are adequate to generate an LNT json
-        report file of the version specified at construction when
-        rendering that instance.
-        """
-        # Check requested version is supported by this library
-        assert (
-            self.report_version <= 2
-        ), "Only v2 or older supported for LNT report format Run objects."
-        if self.start_time is None and self.end_time is None and not bool(self.info):
-            raise ValueError("No data defined in this Run")
+        if 'llvm_project_revision' not in self.info:
+            raise ValueError("Missing 'llvm_project_revision' entry in 'info' dictionary")
 
     def update_endtime(self, end_time=None):
         """Update the end time of this run."""
-        if self.report_version <= 1 and end_time is None:
-            end_time = datetime.datetime.utcnow()
         self.end_time = normalize_time(end_time) if end_time else None
-        self.check()
 
     def render(self):
         """Return info from this instance in a dictionary that respects
-        the LNT report format in the version specified at construction
-        when printed as json.
+        the LNT JSON report format.
         """
-        if self.report_version == 2:
-            d = dict(self.info)
-            if self.start_time is not None:
-                d['start_time'] = self.start_time
-            if self.end_time is not None:
-                d['end_time'] = self.end_time
-            return d
-        else:
-            info = dict(self.info)
-            if self.report_version == 1:
-                info['__report_version__'] = '1'
-            return {'Start Time': self.start_time,
-                    'End Time': self.end_time,
-                    'Info': info}
+        d = dict(self.info)
+        if self.start_time is not None:
+            d['start_time'] = self.start_time
+        if self.end_time is not None:
+            d['end_time'] = self.end_time
+        return d
 
 
 class Test:
     """Information on a particular test in the run and its associated
     samples.
 
     The server automatically creates test database objects whenever a
-    new test name is seen. Test should be used to generate report in
-    version 2 or later of LNT JSON report file format.
+    new test name is seen.
 
     Test names are intended to be a persistent, recognizable identifier
     for what is being executed. Currently, most formats use some form of
@@ -242,10 +156,10 @@ class Test:
     for example, the compile flags the test was built with, or the
     runtime parameters that were used. As a general rule, if two test
     samples are meaningfully and directly comparable, then they should
-    have the same test name but different info paramaters.
+    have the same test name but different info parameters.
     """
 
-    def __init__(self, name, samples, info={}, report_version=2):
+    def __init__(self, name, samples, info={}):
         self.name = name
         self.samples = samples
         self.info = dict()
@@ -254,33 +168,22 @@ def __init__(self, name, samples, info={}, report_version=2):
             key = str(key)
             value = str(value)
             self.info[key] = value
-        self.report_version = report_version
         self.check()
 
     def check(self):
         """Check object members are adequate to generate an LNT json
-        report file of the version specified at construction when
-        rendering that instance.
+        report file.
         """
-        # Check requested version is supported by this library and is
-        # valid for this object.
-        assert (
-            self.report_version == 2
-        ), "Only v2 supported for LNT report format Test objects."
         for s in self.samples:
             assert isinstance(s, MetricSamples), "Unexpected type for metric sample."
-            assert (
-                s.report_version == self.report_version
-            ), "Mismatch between test and metric samples."
 
     def render(self):
         """Return info from this instance in a dictionary that respects
-        the LNT report format in the version specified at construction
-        when printed as json.
+        the LNT JSON report format.
         """
         d = dict(self.info)
         d.update([s.render().popitem() for s in self.samples])
-        d['Name'] = self.name
+        d['name'] = self.name
         return d
 
 
@@ -309,7 +212,7 @@ class TestSamples:
     for example, the compile flags the test was built with, or the
     runtime parameters that were used. As a general rule, if two test
     samples are meaningfully and directly comparable, then they should
-    have the same test name but different info paramaters.
+    have the same test name but different info parameters.
 
     The report may include an arbitrary number of samples for each test
     for situations where the same test is run multiple times to gather
@@ -347,27 +250,11 @@ class MetricSamples:
     An arbitrary number of samples for a given metric is allowed for
     situations where the same metric is obtained several time for a
     given test to gather statistical data.
-
-    MetricSamples should be used to generate report in version 2 or
-    later of LNT JSON report file format.
     """
 
-    def __init__(self, metric, data, conv_f=float, report_version=2):
+    def __init__(self, metric, data, conv_f=float):
         self.metric = str(metric)
         self.data = list(map(conv_f, data))
-        self.report_version = report_version
-        self.check()
-
-    def check(self):
-        """Check object members are adequate to generate an LNT json
-        report file of the version specified at construction when
-        rendering that instance.
-        """
-        # Check requested version is supported by this library and is
-        # valid for this object.
-        assert (
-            self.report_version == 2
-        ), "Only v2 supported for LNT report format MetricSamples objects."
 
     def add_samples(self, new_samples, conv_f=float):
         """Add samples for this metric, converted to float by calling
@@ -389,8 +276,10 @@ def render(self):
 # We record information on the report "version" to allow the server to support
 # some level of auto-upgrading data from submissions of older reports.
 #
-# We recorder the report version as a reserved key in the run information
-# (primarily so that it can be accessed post-import on the server).
+# We record the report version as a reserved key in the run information. When
+# importing data, we detect the version of the report using the version number
+# and we normalize it to the latest format so that the rest of the code only
+# has to deal with the latest version at all times.
 #
 # Version 0 --           : initial (and unversioned).
 #