LLVM Optimization Pass Viewer (#3769)gh-3514

author: Jeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com> 2022-06-30 19:53:02 -0400
committer: GitHub <noreply@github.com> 2022-07-01 01:53:02 +0200
commit: ed1a4e6b2794e096ecf049a64ed4944733f81b94 (patch)
tree: cc25548f69a2b00ed561ca61ba091ec299739b27 /lib/parsers/llvm-pass-dump-parser.ts
parent: d0a7a22bfa7503547c9d5c35c3d9e24e6dd311ad (diff)
download: compiler-explorer-ed1a4e6b2794e096ecf049a64ed4944733f81b94.tar.gz
compiler-explorer-ed1a4e6b2794e096ecf049a64ed4944733f81b94.zip
1 files changed, 449 insertions, 0 deletions
diff --git a/lib/parsers/llvm-pass-dump-parser.ts b/lib/parsers/llvm-pass-dump-parser.ts
new file mode 100644
index 000000000..9e8d5f03d
--- /dev/null
+++ b/lib/parsers/llvm-pass-dump-parser.ts
@@ -0,0 +1,449 @@
+// Copyright (c) 2022, Compiler Explorer Authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright notice,
+//       this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+
+import _ from 'underscore';
+
+import {
+    LLVMOptPipelineBackendOptions,
+    LLVMOptPipelineOutput,
+    Pass,
+} from '../../types/compilation/llvm-opt-pipeline-output.interfaces';
+import {ParseFilters} from '../../types/features/filters.interfaces';
+import {ResultLine} from '../../types/resultline/resultline.interfaces';
+import {logger} from '../logger';
+
+// Note(jeremy-rifkin):
+// For now this filters out a bunch of metadata we aren't interested in
+// Maybe at a later date we'll want to allow user-controllable filters
+// It'd be helpful to better display annotations about branch weights
+// and parse debug info too at some point.
+
+// TODO(jeremy-rifkin): Doe we already have an assert utility
+function assert(condition: boolean, message?: string, ...args: any[]): asserts condition {
+    if (!condition) {
+        const error = message
+            ? `Assertion error in llvm-print-after-all-parser: ${message}`
+            : `Assertion error in llvm-print-after-all-parser`;
+        logger.error(error + ' ' + JSON.stringify(args));
+        throw error;
+    }
+}
+
+// Ir Dump for a pass with raw lines
+type PassDump = {
+    header: string;
+    affectedFunction: string | undefined;
+    machine: boolean;
+    lines: ResultLine[];
+};
+// Ir Dump for a pass with raw lines broken into affected functions (or "<loop>")
+type SplitPassDump = {
+    header: string;
+    machine: boolean;
+    functions: Record<string, ResultLine[]>;
+};
+
+export class LlvmPassDumpParser {
+    filters: RegExp[];
+    lineFilters: RegExp[];
+    irDumpHeader: RegExp;
+    machineCodeDumpHeader: RegExp;
+    functionDefine: RegExp;
+    machineFunctionBegin: RegExp;
+    functionEnd: RegExp;
+    //label: RegExp;
+    //instruction: RegExp;
+
+    constructor(compilerProps) {
+        //this.maxIrLines = 5000;
+        //if (compilerProps) {
+        //    this.maxIrLines = compilerProps('maxLinesOfAsm', this.maxIrLines);
+        //}
+
+        this.filters = [
+            /^; ModuleID = '.+'$/, // module id line
+            /^(source_filename|target datalayout|target triple) = ".+"$/, // module metadata
+            /^; Function Attrs: .+$/, // function attributes
+            /^\s+call void @llvm.dbg.value.+$/, // dbg calls
+            /^\s+call void @llvm.dbg.declare.+$/, // dbg calls
+            /^declare .+$/, // declare directives
+            /^(!\d+) = (?:distinct )?!DI([A-Za-z]+)\(([^)]+?)\)/, // meta
+            /^(!\d+) = (?:distinct )?!{.*}/, // meta
+            /^(![.A-Z_a-z-]+) = (?:distinct )?!{.*}/, // meta
+            /^attributes #\d+ = { .+ }$/, // attributes directive
+        ];
+        this.lineFilters = [
+            /,? ![\dA-Za-z]+((?=( {)?$))/, // debug annotation
+            /,? #\d+((?=( {)?$))/, // attribute annotation
+        ];
+
+        // Ir dump headers look like "*** IR Dump After XYZ ***"
+        // Machine dump headers look like "# *** IR Dump After XYZ ***:", possibly with a comment or "(function: xyz)"
+        // or "(loop: %x)" at the end
+        this.irDumpHeader = /^\*{3} (.+) \*{3}(?:\s+\((?:function: |loop: )(%?[\w$.]+)\))?(?:;.+)?$/;
+        this.machineCodeDumpHeader = /^# \*{3} (.+) \*{3}:$/;
+        // Ir dumps are "define T @_Z3fooi(...) . .. {" or "# Machine code for function _Z3fooi: <attributes>"
+        // Some interesting edge cases found when testing:
+        // `define internal %"struct.libassert::detail::assert_static_parameters"* @"_ZZ4mainENK3$_0clEv"(
+        //      %class.anon* nonnull dereferenceable(1) %0) #5 align 2 !dbg !2 { ... }`
+        // `define internal void @__cxx_global_var_init.1() #0 section ".text.startup" {`
+        this.functionDefine = /^define .+ @([\w.]+|"[^"]+")\(.+$/;
+        this.machineFunctionBegin = /^# Machine code for function ([\w$.]+):.*$/;
+        // Functions end with either a closing brace or "# End machine code for function _Z3fooi."
+        this.functionEnd = /^(?:}|# End machine code for function ([\w$.]+).)$/;
+        // Either "123:" with a possible comment or something like "bb.3 (%ir-block.13):"
+        //this.label = /^(?:\d+:(\s+;.+)?|\w+.+:)$/;
+        //this.instruction = /^\s+.+$/;
+    }
+
+    breakdownOutputIntoPassDumps(ir: ResultLine[]) {
+        // break down output by "*** IR Dump After XYZ ***" markers
+        const raw_passes: PassDump[] = [];
+        let pass: PassDump | null = null;
+        let lastWasBlank = false; // skip duplicate blank lines
+        for (const line of ir) {
+            // stop once the machine code passes start, can't handle these yet
+            //if (this.machineCodeDumpHeader.test(line.text)) {
+            //    break;
+            //}
+            const irMatch = line.text.match(this.irDumpHeader);
+            const machineMatch = line.text.match(this.machineCodeDumpHeader);
+            const header = irMatch || machineMatch;
+            if (header) {
+                if (pass !== null) {
+                    raw_passes.push(pass);
+                }
+                pass = {
+                    header: header[1],
+                    // in dump full module mode some headers are annotated for what function (or loop) they operate on
+                    // if we aren't in full module mode or this is a header that isn't function/loop specific this will
+                    // be undefined
+                    affectedFunction: header[2],
+                    machine: !!machineMatch,
+                    lines: [],
+                };
+                lastWasBlank = true; // skip leading newlines after the header
+            } else {
+                if (pass === null) {
+                    throw 'Internal error during breakdownOutput (1)';
+                }
+                if (line.text.trim() === '') {
+                    if (!lastWasBlank) {
+                        pass.lines.push(line);
+                    }
+                    lastWasBlank = true;
+                } else {
+                    pass.lines.push(line);
+                    lastWasBlank = false;
+                }
+            }
+        }
+        if (pass !== null) {
+            raw_passes.push(pass);
+        }
+        return raw_passes;
+    }
+
+    breakdownPassDumpsIntoFunctions(dump: PassDump) {
+        // break up down dumps for each pass into functions (or absence of functions in the case of loop passes)
+        // we have three cases of ir dumps to consider:
+        // - Most passes dump a single function
+        // - Some passes dump every function
+        // - Some loop passes only dump loops - these are challenging to deal with
+        const pass: SplitPassDump = {
+            header: dump.header,
+            machine: dump.machine,
+            functions: {},
+        };
+        let func: {
+            name: string;
+            lines: ResultLine[];
+        } | null = null;
+        for (const line of dump.lines) {
+            const irFnMatch = line.text.match(this.functionDefine);
+            const machineFnMatch = line.text.match(this.machineFunctionBegin);
+            // function define line
+            if (irFnMatch || machineFnMatch) {
+                // if the last function has not been closed...
+                if (func !== null) {
+                    throw 'Internal error during breakdownPass (1)';
+                }
+                func = {
+                    // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+                    name: (irFnMatch || machineFnMatch)![1],
+                    lines: [line], // include the current line
+                };
+            } else if (line.text.startsWith('; Preheader:')) {
+                // loop dump
+                // every line in this dump should be part of the loop, exit condition will be end of the for loop
+                assert(func === null);
+                func = {
+                    name: '<loop>',
+                    lines: [line], // include the current line
+                };
+            } else {
+                // close function
+                if (this.functionEnd.test(line.text.trim())) {
+                    // if not currently in a function
+                    if (func === null) {
+                        throw 'Internal error during breakdownPass (2)';
+                    }
+                    const {name, lines} = func;
+                    lines.push(line); // include the }
+                    // loop dumps can't be terminated with }
+                    if (name === '<loop>') {
+                        throw 'Internal error during breakdownPass (3)';
+                    }
+                    // somehow dumped twice?
+                    if (name in pass.functions) {
+                        throw 'Internal error during breakdownPass (4)';
+                    }
+                    pass.functions[name] = lines;
+                    func = null;
+                } else {
+                    // lines outside a function definition
+                    if (func === null) {
+                        if (line.text.trim() === '') {
+                            // may be a blank line
+                            continue;
+                        } else {
+                            ///console.log('ignoring ------>', line.text);
+                            // ignore
+                            continue;
+                        }
+                    }
+                    func.lines.push(line);
+                }
+            }
+        }
+        // unterminated function, either a loop dump or an error
+        if (func !== null) {
+            if (func.name === '<loop>') {
+                // loop dumps must be alone
+                if (Object.entries(pass.functions).length > 0) {
+                    //console.dir(dump, { depth: 5, maxArrayLength: 100000 });
+                    //console.log(pass.functions);
+                    throw 'Internal error during breakdownPass (5)';
+                }
+                pass.functions[func.name] = func.lines;
+            } else {
+                throw 'Internal error during breakdownPass (6)';
+            }
+        }
+        return pass;
+    }
+
+    breakdownIntoPassDumpsByFunction(passDumps: SplitPassDump[]) {
+        // Currently we have an array of passes with a map of functions altered in each pass
+        // We want to transpose to a map of functions with an array of passes on the function
+        const passDumpsByFunction: Record<string, PassDump[]> = {};
+        // I'm assuming loop dumps should correspond to the previous function dumped
+        let previousFunction: string | null = null;
+        for (const pass of passDumps) {
+            const {header, machine, functions} = pass;
+            const functionEntries = Object.entries(functions);
+            for (const [function_name, lines] of functionEntries) {
+                const name: string | null = function_name === '<loop>' ? previousFunction : function_name;
+                assert(name !== null, 'Loop dump without preceding dump');
+                if (!(name in passDumpsByFunction)) {
+                    passDumpsByFunction[name] = [];
+                }
+                passDumpsByFunction[name].push({
+                    header,
+                    affectedFunction: undefined,
+                    machine,
+                    lines,
+                });
+            }
+            if (functionEntries.length === 0) {
+                // This can happen as a result of "Printing <null> Function"
+                //throw 'Internal error during breakdownOutput (2)';
+            } else if (functionEntries.length === 1) {
+                const name = functionEntries[0][0];
+                if (name !== '<loop>') {
+                    previousFunction = name;
+                }
+            } else {
+                previousFunction = null;
+            }
+        }
+        return passDumpsByFunction;
+    }
+
+    // used for full module dump mode
+    associateFullDumpsWithFunctions(passDumps: PassDump[]) {
+        // Currently we have an array of passes that'll have target annotations
+        const passDumpsByFunction: Record<string, PassDump[]> = {};
+        // First figure out what all the functions are
+        for (const pass of passDumps) {
+            // If it starts with a % it's a loop
+            if (pass.affectedFunction && !pass.affectedFunction.startsWith('%')) {
+                passDumpsByFunction[pass.affectedFunction] = [];
+            }
+        }
+        passDumpsByFunction['<Full Module>'] = [];
+        // I'm assuming loop dumps should correspond to the previous function dumped
+        //const functions = Object.keys(passDumpsByFunction);
+        let previousFunction: string | null = null;
+        for (const pass of passDumps) {
+            const {header, affectedFunction, machine, lines} = pass;
+            if (affectedFunction) {
+                let fn = affectedFunction;
+                if (affectedFunction.startsWith('%')) {
+                    assert(previousFunction !== null);
+                    fn = previousFunction;
+                }
+                assert(fn in passDumpsByFunction);
+                [passDumpsByFunction[fn], passDumpsByFunction['<Full Module>']].map(entry =>
+                    entry.push({
+                        header: `${header} (${fn})`,
+                        affectedFunction: fn,
+                        machine,
+                        lines,
+                    }),
+                );
+                previousFunction = fn;
+            } else {
+                // applies to everything
+                for (const [_, entry] of Object.entries(passDumpsByFunction)) {
+                    entry.push({
+                        header,
+                        affectedFunction: undefined,
+                        machine,
+                        lines,
+                    });
+                }
+                previousFunction = null;
+            }
+        }
+        return passDumpsByFunction;
+    }
+
+    matchPassDumps(passDumpsByFunction: Record<string, PassDump[]>) {
+        // We have all the passes for each function, now we will go through each function and match the before/after
+        // dumps
+        const final_output: LLVMOptPipelineOutput = {};
+        for (const [function_name, passDumps] of Object.entries(passDumpsByFunction)) {
+            // I had a fantastic chunk2 method to iterate the passes in chunks of 2 but I've been foiled by an edge
+            // case: At least the "Delete dead loops" may only print a before dump and no after dump
+            const passes: Pass[] = [];
+            // i incremented appropriately later
+            for (let i = 0; i < passDumps.length; ) {
+                const pass: Pass = {
+                    name: '',
+                    machine: false,
+                    after: [],
+                    before: [],
+                    irChanged: true,
+                };
+                const current_dump = passDumps[i];
+                const next_dump = i < passDumps.length - 1 ? passDumps[i + 1] : null;
+                if (current_dump.header.startsWith('IR Dump After ')) {
+                    // An after dump without a before dump - I don't think this can happen but we'll handle just in case
+                    pass.name = current_dump.header.slice('IR Dump After '.length);
+                    pass.after = current_dump.lines;
+                    i++;
+                } else if (current_dump.header.startsWith('IR Dump Before ')) {
+                    if (next_dump !== null && next_dump.header.startsWith('IR Dump After ')) {
+                        assert(
+                            current_dump.header.slice('IR Dump Before '.length) ===
+                                next_dump.header.slice('IR Dump After '.length),
+                            current_dump.header,
+                            next_dump.header,
+                        );
+                        assert(current_dump.machine === next_dump.machine);
+                        pass.name = current_dump.header.slice('IR Dump Before '.length);
+                        pass.before = current_dump.lines;
+                        pass.after = next_dump.lines;
+                        i += 2;
+                    } else {
+                        // Before with no after - this can happen with Delete dead loops
+                        pass.name = current_dump.header.slice('IR Dump Before '.length);
+                        pass.before = current_dump.lines;
+                        i++;
+                    }
+                } else {
+                    assert(false, 'Unexpected pass header', current_dump.header);
+                }
+                pass.machine = current_dump.machine;
+                // check for equality
+                pass.irChanged = pass.before.map(x => x.text).join('\n') !== pass.after.map(x => x.text).join('\n');
+                passes.push(pass);
+            }
+            //console.dir(passes, {
+            //    depth: 5,
+            //    maxArrayLength: 100000
+            //});
+            assert(!(function_name in final_output), 'xxx');
+            final_output[function_name] = passes;
+        }
+        return final_output;
+    }
+
+    breakdownOutput(ir: ResultLine[], llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) {
+        // break down output by "*** IR Dump After XYZ ***" markers
+        const raw_passes = this.breakdownOutputIntoPassDumps(ir);
+        if (llvmOptPipelineOptions.fullModule) {
+            const passDumpsByFunction = this.associateFullDumpsWithFunctions(raw_passes);
+            // Match before / after pass dumps and we're done
+            return this.matchPassDumps(passDumpsByFunction);
+        } else {
+            // Further break down by functions in each dump
+            const passDumps = raw_passes.map(this.breakdownPassDumpsIntoFunctions.bind(this));
+            // Transform array of passes containing multiple functions into a map from functions to arrays of passes on
+            // those functions
+            const passDumpsByFunction = this.breakdownIntoPassDumpsByFunction(passDumps);
+            // Match before / after pass dumps and we're done
+            return this.matchPassDumps(passDumpsByFunction);
+        }
+    }
+
+    process(ir: ResultLine[], _: ParseFilters, llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) {
+        // Filter a lot of junk before processing
+        const preprocessed_lines = ir
+            .slice(
+                ir.findIndex(line => line.text.match(this.irDumpHeader) || line.text.match(this.machineCodeDumpHeader)),
+            )
+            .filter(line => this.filters.every(re => line.text.match(re) === null)) // apply filters
+            .map(_line => {
+                let line = _line.text;
+                // eslint-disable-next-line no-constant-condition
+                while (true) {
+                    let newLine = line;
+                    for (const re of this.lineFilters) {
+                        newLine = newLine.replace(re, '');
+                    }
+                    if (newLine === line) {
+                        break;
+                    } else {
+                        line = newLine;
+                    }
+                }
+                _line.text = line;
+                return _line;
+            });
+
+        return this.breakdownOutput(preprocessed_lines, llvmOptPipelineOptions);
+    }
+}
author	Jeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com>	2022-06-30 19:53:02 -0400
committer	GitHub <noreply@github.com>	2022-07-01 01:53:02 +0200
commit	ed1a4e6b2794e096ecf049a64ed4944733f81b94 (patch)
tree	cc25548f69a2b00ed561ca61ba091ec299739b27 /lib/parsers/llvm-pass-dump-parser.ts
parent	d0a7a22bfa7503547c9d5c35c3d9e24e6dd311ad (diff)
download	compiler-explorer-ed1a4e6b2794e096ecf049a64ed4944733f81b94.tar.gz compiler-explorer-ed1a4e6b2794e096ecf049a64ed4944733f81b94.zip