Schema for the XML format¶

For reference, here’s the RELAX-NG schema for the XML serialization format:
<?xml version="1.0" encoding="UTF-8"?>
<!--
    Copyright 2013 David Malcolm <dmalcolm@redhat.com>
    Copyright 2013 Red Hat, Inc.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
    USA
-->
<grammar xmlns="http://relaxng.org/ns/structure/1.0"
         datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
  <start>
    <!-- Results from the invocation of an analysis tool -->
    <element name="analysis">
      <ref name="metadata-element"/>
      <element name="results">
        <zeroOrMore>
          <choice>
            <ref name="issue-element"/>
            <ref name="failure-element"/>
            <ref name="info-element"/>
          </choice>
        </zeroOrMore>
      </element>
      <optional>
        <ref name="custom-fields-element"/>
      </optional>
    </element>
  </start>

  <define name="metadata-element">
      <element name="metadata">
        <element name="generator">
          <attribute name="name"/>
          <optional>
            <attribute name="version"/>
          </optional>
        </element>

        <!-- "sut" = "Software Under Test" -->
        <optional>
          <element name="sut">
            <choice>
              <element name="source-rpm">
                <attribute name="name"/>
                <attribute name="version"/>
                <attribute name="release"/>
                <attribute name="build-arch"/>
              </element>
              <!-- Debian SUT entries -->
              <element name="debian-source">
                <!-- Report for a Debian source package -->
                <attribute name="name"/>
                <attribute name="version"/>
                <optional>
                  <!-- This entry is optional because Debian packages may be
                       `native' (e.g. no local version, since local and
                       upstream are the same). -->
                  <attribute name="release"/>
                </optional>
                <!-- No build arch; source is arch indep. -->
              </element>
              <element name="debian-binary">
                <!-- Report for a Debian .deb package -->
                <attribute name="name"/>
                <attribute name="version"/>
                <optional>
                  <attribute name="release"/>
                </optional>
                <attribute name="build-arch"/>
                <!-- Valid entries include `amd64', `kfreebsd-amd64', `armhf',
                     `hurd-i386', among others for Debian. -->
              </element>
              <!-- What other options should we have? -->
            </choice>
          </element>
        </optional>

        <optional>
          <ref name="file-element"/>
        </optional>

        <optional>
          <element name="stats">
            <!-- actual time taken to run the analysis, in seconds -->
            <attribute name="wall-clock-time">
              <data type="float"/>
            </attribute>
          </element>
        </optional>

      </element>
  </define>

  <!--
    Definitions of the various kinds of result follow:
       <issue>
       <failure>
       <info>
  -->

  <!-- A report about a possible problem -->
  <define name="issue-element">
    <element name="issue">
      <optional>
        <!-- The Common Weakness Enumeration ID
             (see http://cwe.mitre.org/index.html )
             e.g. "131" representing CWE-131
             aka "Incorrect Calculation of Buffer Size"
             http://cwe.mitre.org/data/definitions/131.html
        -->
        <attribute name="cwe">
          <data type="integer"/>
        </attribute>
      </optional>

      <optional>
        <!--
            Each static analysis tool potentially has multiple tests,
            with its own IDs for its own tests.
            Capture those that do here, as free-form strings:
        -->
        <attribute name="test-id"/>
      </optional>

      <optional>
        <!--
            Each static analysis tool potentially can report a "severity",
            which may be of use for filtering.

            The precise strings are likely to vary from tool to tool.
            To avoid data-transfer issues, support storing it as an optional
            freeform string here.

            See:
            http://lists.fedoraproject.org/pipermail/firehose-devel/2013-February/000001.html
        -->
        <attribute name="severity"/>
      </optional>

      <!-- A message summarizing the problem -->
      <ref name="message-element"/>

      <!-- Additional descriptive details
           This might support some simple markup at some point
           (as might <message>) -->
      <optional>
        <element name="notes"><text/></element>
      </optional>

      <!-- Where is the problem? -->
      <ref name="location-element"/>

      <optional>
        <!-- How can the problem occur? -->
        <element name="trace">
          <oneOrMore>
            <element name="state">
              <ref name="location-element"/>
              <optional>
                <element name="notes"><text/></element>
              </optional>
              <!-- optionally we can supply key-value pairs -->
              <zeroOrMore>
                <element name="annotation">
                  <element name="key"><text/></element>
                  <element name="value"><text/></element>
                </element>
              </zeroOrMore>
              </element>
          </oneOrMore>
        </element>
      </optional>

      <!--
         A given tool/testid may have additional key/value pairs that it
         may be useful to capture:
      -->
      <optional>
        <ref name="custom-fields-element"/>
      </optional>

    </element>
  </define>

  <!--
      A report about a failed analysis.

      If any of these are present then we don't have full coverage.

      For some analyzers this is an all-or-nothing affair: we either
      get issues reported, or a failure happens (e.g. a segfault of the
      analysis tool).

      Other analyzers may be more fine-grained: able to report some issues,
      but choke on some subset of the code under analysis.  For example
      cpychecker runs once per function, and any unhandled Python exceptions
      only affect one function.
  -->
  <define name="failure-element">
    <element name="failure">
      <optional>
        <!--
            Each static analysis tool potentially can identify types of way
            that it can fail.

            Capture those that do here, as (optional) free-form strings:
        -->
        <attribute name="failure-id"/>
      </optional>
      <optional>
        <!--
           Some analysis tools may be able to annotate a failure report by
           providing the location *within the software-under-test* that
           broke them.

           For example, gcc-python-plugin has a gcc.set_location() method
           which can be used by a code analysis script to record what
           location is being analyzed, so that if unhandled Python exception
           happens, it is reported at that location.  This is invaluable
           when debugging analysis failures.
        -->
        <ref name="location-element"/>
      </optional>
      <optional>
        <!-- summary of the failure -->
        <ref name="message-element"/>
      </optional>

      <!--
         Every type of failure seems to have its own kinds of data that
         are worth capturing:
           * stdout/stderr/returncode for a failed subprocess
           * traceback for an unhandled Python exception
           * verbose extra information about a cppcheck failure
         etc.
         Hence allow a <failure> to optionally contain extra key/value
         pairs, based on the failure-id.
      -->
      <optional>
        <ref name="custom-fields-element"/>
      </optional>
    </element>
  </define>

  <!--
     Sometimes you may want a tool to report other kinds of information
     about the software-under-test that isn't a problem as such, e.g.
     code metrics, copyright/license info, cross-referencing information
     etc, hence the <info> element:
  -->
  <define name="info-element">
    <element name="info">
      <optional>
        <!--
            An optional free-form string identifying the kind of information
            being reported:
        -->
        <attribute name="info-id"/>
      </optional>
      <optional>
        <ref name="location-element"/>
      </optional>
      <optional>
        <ref name="message-element"/>
      </optional>
      <optional>
        <ref name="custom-fields-element"/>
      </optional>
    </element>
  </define>

  <!-- ...end of result definitions.  Various supporting elements follow: -->

  <!--
     Summary text aimed at a developer.  This is required for an <issue>,
     but is also can (optionally) be provided by a <failure> or <info>
  -->
  <define name="message-element">
    <element name="message"><text/></element>
  </define>

  <define name="location-element">
    <!-- A particular source code location -->
    <element name="location">
      <ref name="file-element"/>

      <!--
           Ideally, every analyzer would tell us in which function each
           problem was discovered, given that function names are less
           likely to change than line numbers.

           Unfortunately many don't - and we should patch these in each
           upstream analyzer as we go.

           Also, a problem can occur in global scope (e.g. lack of NULL
           termination in an array-initializer for a global, such as in
           this checker:
           http://gcc-python-plugin.readthedocs.org/en/latest/cpychecker.html#verification-of-pymethoddef-tables
           (although arguably there *is* a relevant function there: the
           location of the code that uses that data)
      -->

      <optional>
        <element name="function">
          <attribute name="name"/>
        </element>
      </optional>
      <!-- We can refer to either a location, or a range of locations
           within the file: -->
      <choice>
        <ref name="point-element"/>
        <element name="range">
          <!-- start of range: -->
          <ref name="point-element"/>
          <!-- end of range: -->
          <ref name="point-element"/>
        </element>
      </choice>
    </element>
  </define>

  <define name="file-element">
      <element name="file">
        <!--
           What filename was given by the analyzer?

           This is typically the one supplied to it on the command line,
           which might be absolute or relative.

           Examples:
           - "foo.c"
           - "./src/foo.c"
           - "/home/david/libfoo-1.0/src/foo.c"
        -->
        <attribute name="given-path"/>

        <!--
            Optionally, record the absolute path of the file,
            to help deal with collating results from a build that changes
            working directory (e.g. recursive make)
        -->
        <optional>
          <attribute name="absolute-path"/>
        </optional>
        <optional>
          <element name="hash">
            <attribute name="alg"/>
            <attribute name="hexdigest"/>
          </element>
        </optional>
      </element>
  </define>

  <define name="point-element">
    <element name="point">
      <attribute name="line"/>
      <attribute name="column"/>
    </element>
  </define>

  <!--
     A big escape-hatch in the schema: support for arbitrary, ordered
     key/value pairs for roundtripping data specific to a particular
     situation.  e.g. debugging attributes for a particular failure
  -->
  <define name="custom-fields-element">
    <element name="custom-fields">
      <zeroOrMore>
        <choice>
          <element name="str-field">
            <attribute name="name"/>
            <text/>
          </element>
          <element name="int-field">
            <attribute name="name"/>
            <data type="integer"/>
          </element>
        </choice>
      </zeroOrMore>
    </element>
  </define>

</grammar>