diff options
author | Jeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com> | 2022-06-30 19:53:02 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-01 01:53:02 +0200 |
commit | ed1a4e6b2794e096ecf049a64ed4944733f81b94 (patch) | |
tree | cc25548f69a2b00ed561ca61ba091ec299739b27 /lib/parsers/llvm-pass-dump-parser.ts | |
parent | d0a7a22bfa7503547c9d5c35c3d9e24e6dd311ad (diff) | |
download | compiler-explorer-ed1a4e6b2794e096ecf049a64ed4944733f81b94.tar.gz compiler-explorer-ed1a4e6b2794e096ecf049a64ed4944733f81b94.zip |
LLVM Optimization Pass Viewer (#3769)gh-3514
Diffstat (limited to 'lib/parsers/llvm-pass-dump-parser.ts')
-rw-r--r-- | lib/parsers/llvm-pass-dump-parser.ts | 449 |
1 files changed, 449 insertions, 0 deletions
diff --git a/lib/parsers/llvm-pass-dump-parser.ts b/lib/parsers/llvm-pass-dump-parser.ts new file mode 100644 index 000000000..9e8d5f03d --- /dev/null +++ b/lib/parsers/llvm-pass-dump-parser.ts @@ -0,0 +1,449 @@ +// Copyright (c) 2022, Compiler Explorer Authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +import _ from 'underscore'; + +import { + LLVMOptPipelineBackendOptions, + LLVMOptPipelineOutput, + Pass, +} from '../../types/compilation/llvm-opt-pipeline-output.interfaces'; +import {ParseFilters} from '../../types/features/filters.interfaces'; +import {ResultLine} from '../../types/resultline/resultline.interfaces'; +import {logger} from '../logger'; + +// Note(jeremy-rifkin): +// For now this filters out a bunch of metadata we aren't interested in +// Maybe at a later date we'll want to allow user-controllable filters +// It'd be helpful to better display annotations about branch weights +// and parse debug info too at some point. + +// TODO(jeremy-rifkin): Doe we already have an assert utility +function assert(condition: boolean, message?: string, ...args: any[]): asserts condition { + if (!condition) { + const error = message + ? `Assertion error in llvm-print-after-all-parser: ${message}` + : `Assertion error in llvm-print-after-all-parser`; + logger.error(error + ' ' + JSON.stringify(args)); + throw error; + } +} + +// Ir Dump for a pass with raw lines +type PassDump = { + header: string; + affectedFunction: string | undefined; + machine: boolean; + lines: ResultLine[]; +}; +// Ir Dump for a pass with raw lines broken into affected functions (or "<loop>") +type SplitPassDump = { + header: string; + machine: boolean; + functions: Record<string, ResultLine[]>; +}; + +export class LlvmPassDumpParser { + filters: RegExp[]; + lineFilters: RegExp[]; + irDumpHeader: RegExp; + machineCodeDumpHeader: RegExp; + functionDefine: RegExp; + machineFunctionBegin: RegExp; + functionEnd: RegExp; + //label: RegExp; + //instruction: RegExp; + + constructor(compilerProps) { + //this.maxIrLines = 5000; + //if (compilerProps) { + // this.maxIrLines = compilerProps('maxLinesOfAsm', this.maxIrLines); + //} + + this.filters = [ + /^; ModuleID = '.+'$/, // module id line + /^(source_filename|target datalayout|target triple) = ".+"$/, // module metadata + /^; Function Attrs: .+$/, // function attributes + /^\s+call void @llvm.dbg.value.+$/, // dbg calls + /^\s+call void @llvm.dbg.declare.+$/, // dbg calls + /^declare .+$/, // declare directives + /^(!\d+) = (?:distinct )?!DI([A-Za-z]+)\(([^)]+?)\)/, // meta + /^(!\d+) = (?:distinct )?!{.*}/, // meta + /^(![.A-Z_a-z-]+) = (?:distinct )?!{.*}/, // meta + /^attributes #\d+ = { .+ }$/, // attributes directive + ]; + this.lineFilters = [ + /,? ![\dA-Za-z]+((?=( {)?$))/, // debug annotation + /,? #\d+((?=( {)?$))/, // attribute annotation + ]; + + // Ir dump headers look like "*** IR Dump After XYZ ***" + // Machine dump headers look like "# *** IR Dump After XYZ ***:", possibly with a comment or "(function: xyz)" + // or "(loop: %x)" at the end + this.irDumpHeader = /^\*{3} (.+) \*{3}(?:\s+\((?:function: |loop: )(%?[\w$.]+)\))?(?:;.+)?$/; + this.machineCodeDumpHeader = /^# \*{3} (.+) \*{3}:$/; + // Ir dumps are "define T @_Z3fooi(...) . .. {" or "# Machine code for function _Z3fooi: <attributes>" + // Some interesting edge cases found when testing: + // `define internal %"struct.libassert::detail::assert_static_parameters"* @"_ZZ4mainENK3$_0clEv"( + // %class.anon* nonnull dereferenceable(1) %0) #5 align 2 !dbg !2 { ... }` + // `define internal void @__cxx_global_var_init.1() #0 section ".text.startup" {` + this.functionDefine = /^define .+ @([\w.]+|"[^"]+")\(.+$/; + this.machineFunctionBegin = /^# Machine code for function ([\w$.]+):.*$/; + // Functions end with either a closing brace or "# End machine code for function _Z3fooi." + this.functionEnd = /^(?:}|# End machine code for function ([\w$.]+).)$/; + // Either "123:" with a possible comment or something like "bb.3 (%ir-block.13):" + //this.label = /^(?:\d+:(\s+;.+)?|\w+.+:)$/; + //this.instruction = /^\s+.+$/; + } + + breakdownOutputIntoPassDumps(ir: ResultLine[]) { + // break down output by "*** IR Dump After XYZ ***" markers + const raw_passes: PassDump[] = []; + let pass: PassDump | null = null; + let lastWasBlank = false; // skip duplicate blank lines + for (const line of ir) { + // stop once the machine code passes start, can't handle these yet + //if (this.machineCodeDumpHeader.test(line.text)) { + // break; + //} + const irMatch = line.text.match(this.irDumpHeader); + const machineMatch = line.text.match(this.machineCodeDumpHeader); + const header = irMatch || machineMatch; + if (header) { + if (pass !== null) { + raw_passes.push(pass); + } + pass = { + header: header[1], + // in dump full module mode some headers are annotated for what function (or loop) they operate on + // if we aren't in full module mode or this is a header that isn't function/loop specific this will + // be undefined + affectedFunction: header[2], + machine: !!machineMatch, + lines: [], + }; + lastWasBlank = true; // skip leading newlines after the header + } else { + if (pass === null) { + throw 'Internal error during breakdownOutput (1)'; + } + if (line.text.trim() === '') { + if (!lastWasBlank) { + pass.lines.push(line); + } + lastWasBlank = true; + } else { + pass.lines.push(line); + lastWasBlank = false; + } + } + } + if (pass !== null) { + raw_passes.push(pass); + } + return raw_passes; + } + + breakdownPassDumpsIntoFunctions(dump: PassDump) { + // break up down dumps for each pass into functions (or absence of functions in the case of loop passes) + // we have three cases of ir dumps to consider: + // - Most passes dump a single function + // - Some passes dump every function + // - Some loop passes only dump loops - these are challenging to deal with + const pass: SplitPassDump = { + header: dump.header, + machine: dump.machine, + functions: {}, + }; + let func: { + name: string; + lines: ResultLine[]; + } | null = null; + for (const line of dump.lines) { + const irFnMatch = line.text.match(this.functionDefine); + const machineFnMatch = line.text.match(this.machineFunctionBegin); + // function define line + if (irFnMatch || machineFnMatch) { + // if the last function has not been closed... + if (func !== null) { + throw 'Internal error during breakdownPass (1)'; + } + func = { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + name: (irFnMatch || machineFnMatch)![1], + lines: [line], // include the current line + }; + } else if (line.text.startsWith('; Preheader:')) { + // loop dump + // every line in this dump should be part of the loop, exit condition will be end of the for loop + assert(func === null); + func = { + name: '<loop>', + lines: [line], // include the current line + }; + } else { + // close function + if (this.functionEnd.test(line.text.trim())) { + // if not currently in a function + if (func === null) { + throw 'Internal error during breakdownPass (2)'; + } + const {name, lines} = func; + lines.push(line); // include the } + // loop dumps can't be terminated with } + if (name === '<loop>') { + throw 'Internal error during breakdownPass (3)'; + } + // somehow dumped twice? + if (name in pass.functions) { + throw 'Internal error during breakdownPass (4)'; + } + pass.functions[name] = lines; + func = null; + } else { + // lines outside a function definition + if (func === null) { + if (line.text.trim() === '') { + // may be a blank line + continue; + } else { + ///console.log('ignoring ------>', line.text); + // ignore + continue; + } + } + func.lines.push(line); + } + } + } + // unterminated function, either a loop dump or an error + if (func !== null) { + if (func.name === '<loop>') { + // loop dumps must be alone + if (Object.entries(pass.functions).length > 0) { + //console.dir(dump, { depth: 5, maxArrayLength: 100000 }); + //console.log(pass.functions); + throw 'Internal error during breakdownPass (5)'; + } + pass.functions[func.name] = func.lines; + } else { + throw 'Internal error during breakdownPass (6)'; + } + } + return pass; + } + + breakdownIntoPassDumpsByFunction(passDumps: SplitPassDump[]) { + // Currently we have an array of passes with a map of functions altered in each pass + // We want to transpose to a map of functions with an array of passes on the function + const passDumpsByFunction: Record<string, PassDump[]> = {}; + // I'm assuming loop dumps should correspond to the previous function dumped + let previousFunction: string | null = null; + for (const pass of passDumps) { + const {header, machine, functions} = pass; + const functionEntries = Object.entries(functions); + for (const [function_name, lines] of functionEntries) { + const name: string | null = function_name === '<loop>' ? previousFunction : function_name; + assert(name !== null, 'Loop dump without preceding dump'); + if (!(name in passDumpsByFunction)) { + passDumpsByFunction[name] = []; + } + passDumpsByFunction[name].push({ + header, + affectedFunction: undefined, + machine, + lines, + }); + } + if (functionEntries.length === 0) { + // This can happen as a result of "Printing <null> Function" + //throw 'Internal error during breakdownOutput (2)'; + } else if (functionEntries.length === 1) { + const name = functionEntries[0][0]; + if (name !== '<loop>') { + previousFunction = name; + } + } else { + previousFunction = null; + } + } + return passDumpsByFunction; + } + + // used for full module dump mode + associateFullDumpsWithFunctions(passDumps: PassDump[]) { + // Currently we have an array of passes that'll have target annotations + const passDumpsByFunction: Record<string, PassDump[]> = {}; + // First figure out what all the functions are + for (const pass of passDumps) { + // If it starts with a % it's a loop + if (pass.affectedFunction && !pass.affectedFunction.startsWith('%')) { + passDumpsByFunction[pass.affectedFunction] = []; + } + } + passDumpsByFunction['<Full Module>'] = []; + // I'm assuming loop dumps should correspond to the previous function dumped + //const functions = Object.keys(passDumpsByFunction); + let previousFunction: string | null = null; + for (const pass of passDumps) { + const {header, affectedFunction, machine, lines} = pass; + if (affectedFunction) { + let fn = affectedFunction; + if (affectedFunction.startsWith('%')) { + assert(previousFunction !== null); + fn = previousFunction; + } + assert(fn in passDumpsByFunction); + [passDumpsByFunction[fn], passDumpsByFunction['<Full Module>']].map(entry => + entry.push({ + header: `${header} (${fn})`, + affectedFunction: fn, + machine, + lines, + }), + ); + previousFunction = fn; + } else { + // applies to everything + for (const [_, entry] of Object.entries(passDumpsByFunction)) { + entry.push({ + header, + affectedFunction: undefined, + machine, + lines, + }); + } + previousFunction = null; + } + } + return passDumpsByFunction; + } + + matchPassDumps(passDumpsByFunction: Record<string, PassDump[]>) { + // We have all the passes for each function, now we will go through each function and match the before/after + // dumps + const final_output: LLVMOptPipelineOutput = {}; + for (const [function_name, passDumps] of Object.entries(passDumpsByFunction)) { + // I had a fantastic chunk2 method to iterate the passes in chunks of 2 but I've been foiled by an edge + // case: At least the "Delete dead loops" may only print a before dump and no after dump + const passes: Pass[] = []; + // i incremented appropriately later + for (let i = 0; i < passDumps.length; ) { + const pass: Pass = { + name: '', + machine: false, + after: [], + before: [], + irChanged: true, + }; + const current_dump = passDumps[i]; + const next_dump = i < passDumps.length - 1 ? passDumps[i + 1] : null; + if (current_dump.header.startsWith('IR Dump After ')) { + // An after dump without a before dump - I don't think this can happen but we'll handle just in case + pass.name = current_dump.header.slice('IR Dump After '.length); + pass.after = current_dump.lines; + i++; + } else if (current_dump.header.startsWith('IR Dump Before ')) { + if (next_dump !== null && next_dump.header.startsWith('IR Dump After ')) { + assert( + current_dump.header.slice('IR Dump Before '.length) === + next_dump.header.slice('IR Dump After '.length), + current_dump.header, + next_dump.header, + ); + assert(current_dump.machine === next_dump.machine); + pass.name = current_dump.header.slice('IR Dump Before '.length); + pass.before = current_dump.lines; + pass.after = next_dump.lines; + i += 2; + } else { + // Before with no after - this can happen with Delete dead loops + pass.name = current_dump.header.slice('IR Dump Before '.length); + pass.before = current_dump.lines; + i++; + } + } else { + assert(false, 'Unexpected pass header', current_dump.header); + } + pass.machine = current_dump.machine; + // check for equality + pass.irChanged = pass.before.map(x => x.text).join('\n') !== pass.after.map(x => x.text).join('\n'); + passes.push(pass); + } + //console.dir(passes, { + // depth: 5, + // maxArrayLength: 100000 + //}); + assert(!(function_name in final_output), 'xxx'); + final_output[function_name] = passes; + } + return final_output; + } + + breakdownOutput(ir: ResultLine[], llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) { + // break down output by "*** IR Dump After XYZ ***" markers + const raw_passes = this.breakdownOutputIntoPassDumps(ir); + if (llvmOptPipelineOptions.fullModule) { + const passDumpsByFunction = this.associateFullDumpsWithFunctions(raw_passes); + // Match before / after pass dumps and we're done + return this.matchPassDumps(passDumpsByFunction); + } else { + // Further break down by functions in each dump + const passDumps = raw_passes.map(this.breakdownPassDumpsIntoFunctions.bind(this)); + // Transform array of passes containing multiple functions into a map from functions to arrays of passes on + // those functions + const passDumpsByFunction = this.breakdownIntoPassDumpsByFunction(passDumps); + // Match before / after pass dumps and we're done + return this.matchPassDumps(passDumpsByFunction); + } + } + + process(ir: ResultLine[], _: ParseFilters, llvmOptPipelineOptions: LLVMOptPipelineBackendOptions) { + // Filter a lot of junk before processing + const preprocessed_lines = ir + .slice( + ir.findIndex(line => line.text.match(this.irDumpHeader) || line.text.match(this.machineCodeDumpHeader)), + ) + .filter(line => this.filters.every(re => line.text.match(re) === null)) // apply filters + .map(_line => { + let line = _line.text; + // eslint-disable-next-line no-constant-condition + while (true) { + let newLine = line; + for (const re of this.lineFilters) { + newLine = newLine.replace(re, ''); + } + if (newLine === line) { + break; + } else { + line = newLine; + } + } + _line.text = line; + return _line; + }); + + return this.breakdownOutput(preprocessed_lines, llvmOptPipelineOptions); + } +} |