// Copyright (c) 2012-2017, Najjar Chedy // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. var _ = require('underscore-node'), logger = require('./logger').logger; var InstructionType_jmp = 0; var InstructionType_conditionalJmpInst = 1; var InstructionType_notRetInst = 2; var InstructionType_retInst = 3; var isFunctionName = function (x) { return x.text.trim().indexOf('.') !== 0 && !x.text.includes(' for ');// deal with typeinfo name for, typeinfo for, vtable for }; // this mess is intended to be ported to another file. function separateCodeFromData(asmArr) { var jmpLabelRegex = /\.L{1}\d+:/; var isCode = function (x) { return x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || isFunctionName(x)); }; return _.chain(asmArr) .map(_.clone) .filter(isCode) .value(); } var gccX86 = { filterData: separateCodeFromData, isFunctionEnd: function (x) { return ((x[0] !== ' ') && (x[0] !== '.') && (x.indexOf(':') !== -1)); }, isBasicBlockEnd: function (inst, prevInst) { return inst[0] === '.' || prevInst.includes(' ret'); }, getInstructionType: function (inst) { if (inst.includes('jmp')) return InstructionType_jmp; else if (inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst; else if (!inst.includes(' ret')) return InstructionType_notRetInst; else return InstructionType_retInst; }, extractNodeName: function (inst) { var name = inst.match(/\.L{1}\d+/); return name + ':'; }, isJmpInstruction: function (x) { var trimed = x.trim(); return ( trimed[0] === 'j'); } }; function separateCodeFromData_(asmArr) { var jmpLabelRegex = /\.LBB\d+_\d+:/; var isCode = function (x) { return x && x.text && (x.source !== null || jmpLabelRegex.test(x.text) || isFunctionName(x)); }; var removeComments = function (x) { var pos = x.text.indexOf('#'); if (pos !== -1) x.text = x.text.substring(0, pos - 1); return x; }; return _.chain(asmArr) .map(_.clone) .filter(isCode) .map(removeComments) .value(); } var clangX86 = { filterData: separateCodeFromData_, isFunctionEnd: function (x) { return ((x[0] !== ' ') && (x[0] !== '.') && (x.indexOf(':') !== -1)); }, isBasicBlockEnd: function (inst, prevInst) { return inst[0] === '.' || prevInst.includes(' ret'); }, getInstructionType: function (inst) { if (inst.includes('jmp')) return InstructionType_jmp; else if (inst.trim()[0] === 'j') return InstructionType_conditionalJmpInst; else if (!inst.includes(' ret')) return InstructionType_notRetInst; else return InstructionType_retInst; }, extractNodeName: function (inst) { var name = inst.match(/\.LBB\d+_\d+/); return name + ':'; }, isJmpInstruction: function (x) { var trimed = x.trim(); return ( trimed[0] === 'j'); } }; function ControlFlowGraph(compiler) { if (compiler.includes('clang')) this.rules = clangX86; else this.rules = gccX86; } ControlFlowGraph.prototype.splitToFunctions = function (asmArr, isEnd) { if (asmArr.length === 0) return []; var result = []; var first = 1; var last = asmArr.length; var fnRange = {start: 0, end: null}; while (first !== last) { if (isEnd(asmArr[first].text)) { fnRange.end = first; result.push(_.clone(fnRange)); fnRange.start = first; } ++first; } fnRange.end = last; result.push(_.clone(fnRange)); return result; }; ControlFlowGraph.prototype.splitToBasicBlocks = function (asmArr, range, isEnd, isJmp) { var first = range.start; var last = range.end; if (first === last) return []; var functionName = asmArr[first].text; ++first; var rangeBb = {nameId: functionName.substr(0, 50), start: first, end: null, actionPos: []}; var result = []; var newRangeWith = function (oldRange, nameId, start) { return {nameId: nameId, start: start, actionPos: [], end: oldRange.end}; }; while (first < last) { var inst = asmArr[first].text; if (isEnd(inst, asmArr[first - 1] ? asmArr[first - 1].text : '')) { rangeBb.end = first; result.push(_.clone(rangeBb)); //inst is expected to be .L*: where * in 1,2,... rangeBb = newRangeWith(rangeBb, inst, first + 1); } else if (isJmp(inst)) { rangeBb.actionPos.push(first); } ++first; } rangeBb.end = last; result.push(_.clone(rangeBb)); return result; }; ControlFlowGraph.prototype.splitToCanonicalBasicBlock = function (basicBlock) { var actionPos = basicBlock.actionPos; var actPosSz = actionPos.length; if (actionPos[actPosSz - 1] + 1 === basicBlock.end) { --actPosSz; } if (actPosSz === 0) return [{ nameId: basicBlock.nameId, start: basicBlock.start, end: basicBlock.end }]; else if (actPosSz === 1) return [ {nameId: basicBlock.nameId, start: basicBlock.start, end: actionPos[0] + 1}, {nameId: basicBlock.nameId + '@' + (actionPos[0] + 1), start: actionPos[0] + 1, end: basicBlock.end} ]; else { var first = 0; var last = actPosSz; var blockName = basicBlock.nameId; var tmp = {nameId: blockName, start: basicBlock.start, end: actionPos[first] + 1}; var result = []; result.push(_.clone(tmp)); while (first !== last - 1) { tmp.nameId = blockName + '@' + (actionPos[first] + 1); tmp.start = actionPos[first] + 1; ++first; tmp.end = actionPos[first] + 1; result.push(_.clone(tmp)); } tmp = {nameId: blockName + '@' + (actionPos[first] + 1), start: actionPos[first] + 1, end: basicBlock.end}; result.push(_.clone(tmp)); return result; } }; ControlFlowGraph.prototype.concatInstructions = function (asmArr, first, last) { return _.chain(asmArr.slice(first, last)) .map(function (x) { return x.text.substr(0, 50); }) .value() .join('\n'); }; ControlFlowGraph.prototype.makeNodes = function (asmArr, arrOfCanonicalBasicBlock) { var node = {}; var nodes = []; _.each(arrOfCanonicalBasicBlock, _.bind(function (elm) { logger.debug('node name:'); logger.debug(elm.nameId); node.id = elm.nameId; node.label = elm.nameId + ((elm.nameId.indexOf(':') !== -1) ? '' : ':') + '\n' + this.concatInstructions(asmArr, elm.start, elm.end); node.color = '#99ccff'; node.shape = 'box'; nodes.push(_.clone(node)); }, this)); return nodes; }; ControlFlowGraph.prototype.makeEdges = function (asmArr, arrOfCanonicalBasicBlock, instructionType, extractNodeNameFromInstruction) { var edge = {}; var edges = []; var setEdge = function (edge, sourceNode, targetNode, color) { edge.from = sourceNode; edge.to = targetNode; edge.arrows = 'to'; edge.color = color; }; var isBasicBlockEnd = this.rules.isBasicBlockEnd; var hasName = function (asmArr, cbb) { var asm = asmArr[cbb.end]; return asm ? isBasicBlockEnd(asm.text, '') : false; }; var generateName = function (name, suffix) { var pos = name.indexOf('@'); if (pos === -1) return name + '@' + suffix; return name.substring(0, pos + 1) + suffix; }; // note: x.end-1 possible value: jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow _.each(arrOfCanonicalBasicBlock, function (x) { var targetNode; var lastInst = asmArr[x.end - 1].text; switch (instructionType(lastInst)) { case InstructionType_jmp: { //we have to deal only with jmp destination, jmp instruction are always taken. //edge from jump inst logger.debug('jmp'); targetNode = extractNodeNameFromInstruction(lastInst); setEdge(edge, x.nameId, targetNode, 'blue'); edges.push(_.clone(edge)); logger.debug(edge); } break; case InstructionType_conditionalJmpInst: { logger.debug('condit jmp'); //deal with : branch taken, branch not taken targetNode = extractNodeNameFromInstruction(lastInst); setEdge(edge, x.nameId, targetNode, 'green'); edges.push(_.clone(edge)); logger.debug(edge); targetNode = hasName(asmArr, x) ? asmArr[x.end].text : generateName(x.nameId, x.end); setEdge(edge, x.nameId, targetNode, 'red'); edges.push(_.clone(edge)); logger.debug(edge); } break; case InstructionType_notRetInst: { //precondition: lastInst is not last instruction in asmArr (but it is in canonical basic block) //note : asmArr[x.end] expected to be .L*:(name of a basic block) // this .L*: has to be exactly after the last instruction in the current canocial basic block if (asmArr[x.end]) { targetNode = asmArr[x.end].text; setEdge(edge, x.nameId, targetNode, 'grey'); edges.push(_.clone(edge)); logger.debug('not ret inst'); logger.debug(edge); } else { logger.debug(x); } } break; case InstructionType_retInst: logger.debug('expect ret instruction or it\'s variants(rep ret): ' + lastInst); break; } }); logger.debug(edges); return edges; }; ControlFlowGraph.prototype.generateCfgStructure = function (asmArr) { var code = this.rules.filterData(asmArr); var funcs = this.splitToFunctions(code, this.rules.isFunctionEnd); if (!funcs.length) { return funcs; } var result = {}; _.each(funcs, _.bind(function (rng) { var basicBlocks = this.splitToBasicBlocks(code, rng, this.rules.isBasicBlockEnd, this.rules.isJmpInstruction); var arrOfCanonicalBasicBlock = []; _.each(basicBlocks, _.bind(function (elm) { var tmp = this.splitToCanonicalBasicBlock(elm); arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp); }, this)); result[code[rng.start].text] = { nodes: this.makeNodes(code, arrOfCanonicalBasicBlock), edges: this.makeEdges(code, arrOfCanonicalBasicBlock, this.rules.getInstructionType, this.rules.extractNodeName) }; }, this)); return result; }; module.exports.ControlFlowGraph = ControlFlowGraph;