aboutsummaryrefslogtreecommitdiff
path: root/lib/parsers/llvm-pass-dump-parser.ts
diff options
context:
space:
mode:
authorJeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com>2022-06-30 19:53:02 -0400
committerGitHub <noreply@github.com>2022-07-01 01:53:02 +0200
commited1a4e6b2794e096ecf049a64ed4944733f81b94 (patch)
treecc25548f69a2b00ed561ca61ba091ec299739b27 /lib/parsers/llvm-pass-dump-parser.ts
parentd0a7a22bfa7503547c9d5c35c3d9e24e6dd311ad (diff)
downloadcompiler-explorer-ed1a4e6b2794e096ecf049a64ed4944733f81b94.tar.gz
compiler-explorer-ed1a4e6b2794e096ecf049a64ed4944733f81b94.zip
LLVM Optimization Pass Viewer (#3769)gh-3514
Diffstat (limited to 'lib/parsers/llvm-pass-dump-parser.ts')
-rw-r--r--lib/parsers/llvm-pass-dump-parser.ts449
1 files changed, 449 insertions, 0 deletions
diff --git a/lib/parsers/llvm-pass-dump-parser.ts b/lib/parsers/llvm-pass-dump-parser.ts
new file mode 100644
index 000000000..9e8d5f03d
--- /dev/null
+++ b/lib/parsers/llvm-pass-dump-parser.ts
@@ -0,0 +1,449 @@
+// Copyright (c) 2022, Compiler Explorer Authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+
+import _ from 'underscore';
+
+import {
+ LLVMOptPipelineBackendOptions,
+ LLVMOptPipelineOutput,
+ Pass,
+} from '../../types/compilation/llvm-opt-pipeline-output.interfaces';
+import {ParseFilters} from '../../types/features/filters.interfaces';
+import {ResultLine} from '../../types/resultline/resultline.interfaces';
+import {logger} from '../logger';
+
+// Note(jeremy-rifkin):
+// For now this filters out a bunch of metadata we aren't interested in
+// Maybe at a later date we'll want to allow user-controllable filters
+// It'd be helpful to better display annotations about branch weights
+// and parse debug info too at some point.
+
+// TODO(jeremy-rifkin): Doe we already have an assert utility
+function assert(condition: boolean, message?: string, ...args: any[]): asserts condition {
+ if (!condition) {
+ const error = message
+ ? `Assertion error in llvm-print-after-all-parser: ${message}`
+ : `Assertion error in llvm-print-after-all-parser`;
+ logger.error(error + ' ' + JSON.stringify(args));
+ throw error;
+ }
+}
+
+// Ir Dump for a pass with raw lines
+type PassDump = {
+ header: string;
+ affectedFunction: string | undefined;
+ machine: boolean;
+ lines: ResultLine[];
+};
+// Ir Dump for a pass with raw lines broken into affected functions (or "<loop>")
+type SplitPassDump = {
+ header: string;
+ machine: boolean;
+ functions: Record<string, ResultLine[]>;
+};
+
+export class LlvmPassDumpParser {
+ filters: RegExp[];
+ lineFilters: RegExp[];
+ irDumpHeader: RegExp;
+ machineCodeDumpHeader: RegExp;
+ functionDefine: RegExp;
+ machineFunctionBegin: RegExp;
+ functionEnd: RegExp;
+ //label: RegExp;
+ //instruction: RegExp;
+
+ constructor(compilerProps) {
+ //this.maxIrLines = 5000;
+ //if (compilerProps) {
+ // this.maxIrLines = compilerProps('maxLinesOfAsm', this.maxIrLines);
+ //}
+
+ this.filters = [
+ /^; ModuleID = '.+'$/, // module id line
+ /^(source_filename|target datalayout|target triple) = ".+"$/, // module metadata
+ /^; Function Attrs: .+$/, // function attributes
+ /^\s+call void @llvm.dbg.value.+$/, // dbg calls
+ /^\s+call void @llvm.dbg.declare.+$/, // dbg calls
+ /^declare .+$/, // declare directives
+ /^(!\d+) = (?:distinct )?!DI([A-Za-z]+)\(([^)]+?)\)/, // meta
+ /^(!\d+) = (?:distinct )?!{.*}/, // meta
+ /^(![.A-Z_a-z-]+) = (?:distinct )?!{.*}/, // meta
+ /^attributes #\d+ = { .+ }$/, // attributes directive
+ ];
+ this.lineFilters = [
+ /,? ![\dA-Za-z]+((?=( {)?$))/, // debug annotation
+ /,? #\d+((?=( {)?$))/, // attribute annotation
+ ];
+
+ // Ir dump headers look like "*** IR Dump After XYZ ***"
+ // Machine dump headers look like "# *** IR Dump After XYZ ***:", possibly with a comment or "(function: xyz)"
+ // or "(loop: %x)" at the end
+ this.irDumpHeader = /^\*{3} (.+) \*{3}(?:\s+\((?:function: |loop: )(%?[\w$.]+)\))?(?:;.+)?$/;
+ this.machineCodeDumpHeader = /^# \*{3} (.+) \*{3}:$/;
+ // Ir dumps are "define T @_Z3fooi(...) . .. {" or "# Machine code for function _Z3fooi: <attributes>"
+ // Some interesting edge cases found when testing:
+ // `define internal %"struct.libassert::detail::assert_static_parameters"* @"_ZZ4mainENK3$_0clEv"(
+ // %class.anon* nonnull dereferenceable(1) %0) #5 align 2 !dbg !2 { ... }`
+ // `define internal void @__cxx_global_var_init.1() #0 section ".text.startup" {`
+ this.functionDefine = /^define .+ @([\w.]+|"[^"]+")\(.+$/;
+ this.machineFunctionBegin = /^# Machine code for function ([\w$.]+):.*$/;
+ // Functions end with either a closing brace or "# End machine code for function _Z3fooi."
+ this.functionEnd = /^(?:}|# End machine code for function ([\w$.]+).)$/;
+ // Either "123:" with a possible comment or something like "bb.3 (%ir-block.13):"
+ //this.label = /^(?:\d+:(\s+;.+)?|\w+.+:)$/;
+ //this.instruction = /^\s+.+$/;
+ }
+
+ breakdownOutputIntoPassDumps(ir: ResultLine[]) {
+ // break down output by "*** IR Dump After XYZ ***" markers
+ const raw_passes: PassDump[] = [];
+ let pass: PassDump | null = null;
+ let lastWasBlank = false; // skip duplicate blank lines
+ for (const line of ir) {
+ // stop once the machine code passes start, can't handle these yet
+ //if (this.machineCodeDumpHeader.test(line.text)) {
+ // break;
+ //}
+ const irMatch = line.text.match(this.irDumpHeader);
+ const machineMatch = line.text.match(this.machineCodeDumpHeader);
+ const header = irMatch || machineMatch;
+ if (header) {
+ if (pass !== null) {
+ raw_passes.push(pass);
+ }
+ pass = {
+ header: header[1],
+ // in dump full module mode some headers are annotated for what function (or loop) they operate on
+ // if we aren't in full module mode or this is a header that isn't function/loop specific this will
+ // be undefined
+ affectedFunction: header[2],
+ machine: !!machineMatch,
+ lines: [],
+ };
+ lastWasBlank = true; // skip leading newlines after the header
+ } else {
+ if (pass === null) {
+ throw 'Internal error during breakdownOutput (1)';
+ }
+ if (line.text.trim() === '') {
+ if (!lastWasBlank) {
+ pass.lines.push(line);
+ }
+ lastWasBlank = true;
+ } else {
+ pass.lines.push(line);
+ lastWasBlank = false;
+ }
+ }
+ }
+ if (pass !== null) {
+ raw_passes.push(pass);
+ }
+ return raw_passes;
+ }
+
+ breakdownPassDumpsIntoFunctions(dump: PassDump) {
+ // break up down dumps for each pass into functions (or absence of functions in the case of loop passes)
+ // we have three cases of ir dumps to consider:
+ // - Most passes dump a single function
+ // - Some passes dump every function
+ // - Some loop passes only dump loops - these are challenging to deal with
+ const pass: SplitPassDump = {
+ header: dump.header,
+ machine: dump.machine,
+ functions: {},
+ };
+ let func: {
+ name: string;
+ lines: ResultLine[];
+ } | null = null;
+ for (const line of dump.lines) {
+ const irFnMatch = line.text.match(this.functionDefine);
+ const machineFnMatch = line.text.match(this.machineFunctionBegin);
+ // function define line
+ if (irFnMatch || machineFnMatch) {
+ // if the last function has not been closed...
+ if (func !== null) {
+ throw 'Internal error during breakdownPass (1)';
+ }
+ func = {
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+ name: (irFnMatch || machineFnMatch)![1],
+ lines: [line], // include the current line
+ };
+ } else if (line.text.startsWith('; Preheader:')) {
+ // loop dump
+ // every line in this dump should be part of the loop, exit condition will be end of the for loop
+ assert(func === null);
+ func = {
+ name: '<loop>',
+ lines: [line], // include the current line
+ };
+ } else {
+ // close function
+ if (this.functionEnd.test(line.text.trim())) {
+ // if not currently in a function
+ if (func === null) {
+ throw 'Internal error during breakdownPass (2)';
+ }
+ const {name, lines} = func;
+ lines.push(line); // include the }
+ // loop dumps can't be terminated with }
+ if (name === '<loop>') {
+ throw 'Internal error during breakdownPass (3)';
+ }
+ // somehow dumped twice?
+ if (name in pass.functions) {
+ throw 'Internal error during breakdownPass (4)';
+ }
+ pass.functions[name] = lines;
+ func = null;
+ } else {
+ // lines outside a function definition
+ if (func === null) {
+ if (line.text.trim() === '') {
+ // may be a blank line
+ continue;
+ } else {
+ ///console.log('ignoring ------>', line.text);
+ // ignore
+ continue;
+ }
+ }
+ func.lines.push(line);
+ }
+ }
+ }
+ // unterminated function, either a loop dump or an error
+ if (func !== null) {
+ if (func.name === '<loop>') {
+ // loop dumps must be alone
+ if (Object.entries(pass.functions).length > 0) {
+ //console.dir(dump, { depth: 5, maxArrayLength: 100000 });
+ //console.log(pass.functions);
+ throw 'Internal error during breakdownPass (5)';
+ }
+ pass.functions[func.name] = func.lines;
+ } else {
+ throw 'Internal error during breakdownPass (6)';
+ }
+ }
+ return pass;
+ }
+
+ breakdownIntoPassDumpsByFunction(passDumps: SplitPassDump[]) {
+ // Currently we have an array of passes with a map of functions altered in each pass
+ // We want to transpose to a map of functions with an array of passes on the function
+ const passDumpsByFunction: Record<string, PassDump[]> = {};
+ // I'm assuming loop dumps should correspond to the previous function dumped
+ let previousFunction: string | null = null;
+ for (const pass of passDumps) {
+ const {header, machine, functions} = pass;
+ const functionEntries = Object.entries(functions);
+ for (const [function_name, lines] of functionEntries) {
+ const name: string | null = function_name === '<loop>' ? previousFunction : function_name;
+ assert(name !== null, 'Loop dump without preceding dump');
+ if (!(name in passDumpsByFunction)) {
+ passDumpsByFunction[name] = [];
+ }
+ passDumpsByFunction[name].push({
+ header,
+ affectedFunction: undefined,
+ machine,
+ lines,
+ });
+ }
+ if (functionEntries.length === 0) {
+ // This can happen as a result of "Printing <null> Function"
+ //throw 'Internal error during breakdownOutput (2)';
+ } else if (functionEntries.length === 1) {
+ const name = functionEntries[0][0];
+ if (name !== '<loop>') {
+ previousFunction = name;
+ }
+ } else {
+ previousFunction = null;
+ }
+ }
+ return passDumpsByFunction;
+ }
+
+ // used for full module dump mode
+ associateFullDumpsWithFunctions(passDumps: PassDump[]) {
+ // Currently we have an array of passes that'll have target annotations
+ const passDumpsByFunction: Record<string, PassDump[]> = {};
+ // First figure out what all the functions are
+ for (const pass of passDumps) {
+ // If it starts with a % it's a loop
+ if (pass.affectedFunction && !pass.affectedFunction.startsWith('%')) {
+ passDumpsByFunction[pass.affectedFunction] = [];
+ }
+ }
+ passDumpsByFunction['<Full Module>'] = [];
+ // I'm assuming loop dumps should correspond to the previous function dumped
+ //const functions = Object.keys(passDumpsByFunction);
+ let previousFunction: string | null = null;
+ for (const pass of passDumps) {
+ const {header, affectedFunction, machine, lines} = pass;
+ if (affectedFunction) {
+ let fn = affectedFunction;
+ if (affectedFunction.startsWith('%')) {
+ assert(previousFunction !== null);
+ fn = previousFunction;
+ }
+ assert(fn in passDumpsByFunction);
+ [passDumpsByFunction[fn], passDumpsByFunction['<Full Module>']].map(entry =>
+ entry.push({
+ header: `${header} (${fn})`,
+ affectedFunction: fn,
+ machine,
+ lines,
+ }),
+ );
+ previousFunction = fn;
+ } else {
+ // applies to everything
+ for (const [_, entry] of Object.entries(passDumpsByFunction)) {
+ entry.push({
+ header,
+ affectedFunction: undefined,
+ machine,
+ lines,
+ });
+ }
+ previousFunction = null;
+ }
+ }
+ return passDumpsByFunction;
+ }
+
+ matchPassDumps(passDumpsByFunction: Record<string, PassDump[]>) {
+ // We have all the passes for each function, now we will go through each function and match the before/after
+ // dumps
+ const final_output: LLVMOptPipelineOutput = {};
+ for (const [function_name, passDumps] of Object.entries(passDumpsByFunction)) {
+ // I had a fantastic chunk2 method to iterate the passes in chunks of 2 but I've been foiled by an edge
+ // case: At least the "Delete dead loops" may only print a before dump and no after dump
+ const passes: Pass[] = [];
+ // i incremented appropriately later
+ for (let i = 0; i < passDumps.length; ) {
+ const pass: Pass = {
+ name: '',
+ machine: false,
+ after: [],
+ before: [],
+ irChanged: true,
+ };
+ const current_dump = passDumps[i];
+ const next_dump = i < passDumps.length - 1 ? passDumps[i + 1] : null;
+ if (current_dump.header.startsWith('IR Dump After ')) {
+ // An after dump without a before dump - I don't think this can happen but we'll handle just in case
+ pass.name = current_dump.header.slice('IR Dump After '.length);
+ pass.after = current_dump.lines;
+ i++;
+ } else if (current_dump.header.startsWith('IR Dump Before ')) {
+ if (next_dump !== null && next_dump.header.startsWith('IR Dump After ')) {
+ assert(
+ current_dump.header.slice('IR Dump Before '.length) ===
+ next_dump.header.slice('IR Dump After '.length),
+ current_dump.header,
+ next_dump.header,
+ );
+ assert(current_dump.machine === next_dump.machine);
+ pass.name = current_dump.header.slice('IR Dump Before '.length);
+ pass.before = current_dump.lines;
+ pass.after = next_dump.lines;
+ i += 2;
+ } else {
+ // Before with no after - this can happen with Delete dead loops
+ pass.name = current_dump.header.slice('IR Dump Before '.length);
+ pass.before = current_dump.lines;
+ i++;
+ }
+ } else {
+ assert(false, 'Unexpected pass header', current_dump.header);
+ }
+ pass.machine = current_dump.machine;
+ // check for equality
+ pass.irChanged = pass.before.map(x => x.text).join('\n') !== pass.after.map(x => x.text).join('\n');
+ passes.push(pass);
+ }
+ //console.dir(passes, {
+ // depth: 5,
+ // maxArrayLength: 100000
+ //});
+ assert(!(function_name in final_output), 'xxx');
+ final_output[function_name] = passes;
+ }
+ return final_output;
+ }
+
+ breakdownOutput(ir: ResultLine[], llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) {
+ // break down output by "*** IR Dump After XYZ ***" markers
+ const raw_passes = this.breakdownOutputIntoPassDumps(ir);
+ if (llvmOptPipelineOptions.fullModule) {
+ const passDumpsByFunction = this.associateFullDumpsWithFunctions(raw_passes);
+ // Match before / after pass dumps and we're done
+ return this.matchPassDumps(passDumpsByFunction);
+ } else {
+ // Further break down by functions in each dump
+ const passDumps = raw_passes.map(this.breakdownPassDumpsIntoFunctions.bind(this));
+ // Transform array of passes containing multiple functions into a map from functions to arrays of passes on
+ // those functions
+ const passDumpsByFunction = this.breakdownIntoPassDumpsByFunction(passDumps);
+ // Match before / after pass dumps and we're done
+ return this.matchPassDumps(passDumpsByFunction);
+ }
+ }
+
+ process(ir: ResultLine[], _: ParseFilters, llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) {
+ // Filter a lot of junk before processing
+ const preprocessed_lines = ir
+ .slice(
+ ir.findIndex(line => line.text.match(this.irDumpHeader) || line.text.match(this.machineCodeDumpHeader)),
+ )
+ .filter(line => this.filters.every(re => line.text.match(re) === null)) // apply filters
+ .map(_line => {
+ let line = _line.text;
+ // eslint-disable-next-line no-constant-condition
+ while (true) {
+ let newLine = line;
+ for (const re of this.lineFilters) {
+ newLine = newLine.replace(re, '');
+ }
+ if (newLine === line) {
+ break;
+ } else {
+ line = newLine;
+ }
+ }
+ _line.text = line;
+ return _line;
+ });
+
+ return this.breakdownOutput(preprocessed_lines, llvmOptPipelineOptions);
+ }
+}