Skip to content

Commit

Permalink
core(tracehouse): make main-thread-tasks creation more forgiving (#9491)
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickhulce authored and paulirish committed Sep 26, 2019
1 parent 39305be commit 16fdf45
Show file tree
Hide file tree
Showing 2 changed files with 475 additions and 70 deletions.
316 changes: 254 additions & 62 deletions lighthouse-core/lib/tracehouse/main-thread-tasks.js
Expand Up @@ -44,104 +44,292 @@ const {taskGroups, taskNameToGroup} = require('./task-groups.js');
class MainThreadTasks {
/**
* @param {LH.TraceEvent} event
* @param {TaskNode} [parent]
* @param {Pick<LH.TraceEvent, 'ph'|'ts'>} [endEvent]
* @return {TaskNode}
*/
static _createNewTaskNode(event, parent) {
static _createNewTaskNode(event, endEvent) {
const isCompleteEvent = event.ph === 'X' && !endEvent;
const isStartEndEventPair = event.ph === 'B' && endEvent && endEvent.ph === 'E';
if (!isCompleteEvent && !isStartEndEventPair) {
throw new Error('Invalid parameters for _createNewTaskNode');
}

const startTime = event.ts;
const endTime = endEvent ? endEvent.ts : event.ts + Number(event.dur || 0);

const newTask = {
event,
startTime: event.ts,
endTime: event.ph === 'X' ? event.ts + Number(event.dur || 0) : NaN,
parent: parent,
children: [],
startTime,
endTime,
duration: endTime - startTime,

// These properties will be filled in later
parent: undefined,
children: [],
attributableURLs: [],
group: taskGroups.other,
duration: NaN,
selfTime: NaN,
};

if (parent) {
parent.children.push(newTask);
}

return newTask;
}

/**
* @param {LH.TraceEvent[]} mainThreadEvents
*
* @param {TaskNode} currentTask
* @param {number} stopTs
* @param {PriorTaskData} priorTaskData
* @param {Array<LH.TraceEvent>} reverseEventsQueue
*/
static _assignAllTimersUntilTs(
currentTask,
stopTs,
priorTaskData,
reverseEventsQueue
) {
while (reverseEventsQueue.length) {
const nextTimerInstallEvent = reverseEventsQueue.pop();
// We're out of events to look at; we're done.
if (!nextTimerInstallEvent) break;

// Timer event is after our current task; push it back on for next time, and we're done.
if (nextTimerInstallEvent.ts > stopTs) {
reverseEventsQueue.push(nextTimerInstallEvent);
break;
}

// Timer event is before the current task, just skip it.
if (nextTimerInstallEvent.ts < currentTask.startTime) {
continue;
}

// We're right where we need to be, point the timerId to our `currentTask`
/** @type {string} */
// @ts-ignore - timerId exists on `TimerInstall` events.
const timerId = nextTimerInstallEvent.args.data.timerId;
priorTaskData.timers.set(timerId, currentTask);
}
}

/**
* This function takes the start and end events from a thread and creates tasks from them.
* We do this by iterating through the start and end event arrays simultaneously. For each start
* event we attempt to find its end event.
*
* Because of this matching of start/end events and the need to be mutating our end events queue,
* we reverse the array to more efficiently `.pop()` them off rather than `.shift()` them off.
* While it's true the worst case runtime here is O(n^2), ~99.999% of the time the reverse loop is O(1)
* because the overwhelmingly common case is that end event for a given start event is simply the very next event in our queue.
*
* @param {LH.TraceEvent[]} taskStartEvents
* @param {LH.TraceEvent[]} taskEndEvents
* @param {number} traceEndTs
* @return {TaskNode[]}
*/
static _createTasksFromEvents(mainThreadEvents, priorTaskData, traceEndTs) {
static _createTasksFromStartAndEndEvents(taskStartEvents, taskEndEvents, traceEndTs) {
/** @type {TaskNode[]} */
const tasks = [];
/** @type {TaskNode|undefined} */
let currentTask;
// Create a reversed copy of the array to avoid copying the rest of the queue on every mutation.
// i.e. pop() is O(1) while shift() is O(n), we take the earliest ts element off the queue *a lot*
// so we'll optimize for having the earliest timestamp events at the end of the array.
const taskEndEventsReverseQueue = taskEndEvents.slice().reverse();

for (let i = 0; i < taskStartEvents.length; i++) {
const taskStartEvent = taskStartEvents[i];
if (taskStartEvent.ph === 'X') {
// Task is a complete X event, we have all the information we need already.
tasks.push(MainThreadTasks._createNewTaskNode(taskStartEvent));
continue;
}

for (const event of mainThreadEvents) {
// Save the timer data, TimerInstall events are instant events `ph === 'I'` so process them first.
if (event.name === 'TimerInstall' && currentTask) {
/** @type {string} */
// @ts-ignore - timerId exists when name is TimerInstall
const timerId = event.args.data.timerId;
priorTaskData.timers.set(timerId, currentTask);
// Task is a B/E event pair, we need to find the matching E event.
let matchedEventIndex = -1;
let matchingNestedEventCount = 0;
let matchingNestedEventIndex = i + 1;

// We loop through the reversed end events queue from back to front because we still want to
// see end events in increasing timestamp order.
// While worst case we will loop through all events, the overwhelmingly common case is that
// the immediate next event is our event of interest which makes this loop typically O(1).
for (let j = taskEndEventsReverseQueue.length - 1; j >= 0; j--) {
const endEvent = taskEndEventsReverseQueue[j];
// We are considering an end event, so we'll count how many nested events we saw along the way.
for (; matchingNestedEventIndex < taskStartEvents.length; matchingNestedEventIndex++) {
if (taskStartEvents[matchingNestedEventIndex].ts >= endEvent.ts) break;

if (taskStartEvents[matchingNestedEventIndex].name === taskStartEvent.name) {
matchingNestedEventCount++;
}
}

// The event doesn't have a matching name, skip it.
if (endEvent.name !== taskStartEvent.name) continue;
// The event has a timestamp that is too early, skip it.
if (endEvent.ts < taskStartEvent.ts) continue;

// The event matches our name and happened after start, the last thing to check is if it was for a nested event.
if (matchingNestedEventCount > 0) {
// If it was for a nested event, decrement our counter and move on.
matchingNestedEventCount--;
continue;
}

// If it wasn't, we found our matching E event! Mark the index and stop the loop.
matchedEventIndex = j;
break;
}

// Only look at X (Complete), B (Begin), and E (End) events as they have most data
if (event.ph !== 'X' && event.ph !== 'B' && event.ph !== 'E') continue;
/** @type {Pick<LH.TraceEvent, 'ph'|'ts'>} */
let taskEndEvent;
if (matchedEventIndex === -1) {
// If we couldn't find an end event, we'll assume it's the end of the trace.
// If this creates invalid parent/child relationships it will be caught in the next step.
taskEndEvent = {ph: 'E', ts: traceEndTs};
} else if (matchedEventIndex === taskEndEventsReverseQueue.length - 1) {
// Use .pop() in the common case where the immediately next event is needed.
// It's ~25x faster, https://jsperf.com/pop-vs-splice.
taskEndEvent = /** @type {LH.TraceEvent} */ (taskEndEventsReverseQueue.pop());
} else {
taskEndEvent = taskEndEventsReverseQueue.splice(matchedEventIndex, 1)[0];
}

// Update currentTask based on the elapsed time.
// The next event may be after currentTask has ended.
tasks.push(MainThreadTasks._createNewTaskNode(taskStartEvent, taskEndEvent));
}

if (taskEndEventsReverseQueue.length) {
throw new Error(
`Fatal trace logic error - ${taskEndEventsReverseQueue.length} unmatched end events`
);
}

return tasks;
}

/**
* This function iterates through the tasks to set the `.parent`/`.children` properties of tasks
* according to their implied nesting structure. If any of these relationships seem impossible based on
* the timestamps, this method will throw.
*
* @param {TaskNode[]} sortedTasks
* @param {LH.TraceEvent[]} timerInstallEvents
* @param {PriorTaskData} priorTaskData
*/
static _createTaskRelationships(sortedTasks, timerInstallEvents, priorTaskData) {
/** @type {TaskNode|undefined} */
let currentTask;
// Create a reversed copy of the array to avoid copying the rest of the queue on every mutation.
const timerInstallEventsReverseQueue = timerInstallEvents.slice().reverse();

for (let i = 0; i < sortedTasks.length; i++) {
const nextTask = sortedTasks[i];

// This inner loop updates what our `currentTask` is at `nextTask.startTime - ε`.
// While `nextTask` starts after our `currentTask`, close out the task, popup to the parent, and repeat.
// If at the end `currentTask` is undefined, then `nextTask` is a toplevel task.
// Otherwise, `nextTask` is a child of `currentTask`.
while (
currentTask &&
Number.isFinite(currentTask.endTime) &&
currentTask.endTime <= event.ts
currentTask.endTime <= nextTask.startTime
) {
MainThreadTasks._assignAllTimersUntilTs(
currentTask,
currentTask.endTime,
priorTaskData,
timerInstallEventsReverseQueue
);
currentTask = currentTask.parent;
}

// If we don't have a current task, start a new one.
if (!currentTask) {
// We can't start a task with an end event
if (event.ph === 'E') {
throw new Error('Fatal trace logic error - unexpected end event');
// If there's a `currentTask`, `nextTask` must be a child.
// Set the `.parent`/`.children` relationships and timer bookkeeping accordingly.
if (currentTask) {
if (nextTask.endTime > currentTask.endTime) {
const timeDelta = nextTask.endTime - currentTask.endTime;
// The child task is taking longer than the parent task, which should be impossible.
// If it's less than 1ms, we'll let it slide by increasing the duration of the parent.
// If it's more, throw an error.
if (timeDelta < 1000) {
currentTask.endTime = nextTask.endTime;
currentTask.duration += timeDelta;
} else {
// If we fell into this error, it's usually because of one of three reasons.
// - We were missing an E event for a child task and we assumed the child ended at the end of the trace.
// - There was slop in the opposite direction (child started 1ms before parent) and the child was assumed to be parent instead.
// - The child timestamp ended more than 1ms after tha parent.
// These have more complicated fixes, so handling separately https://github.com/GoogleChrome/lighthouse/pull/9491#discussion_r327331204.
throw new Error('Fatal trace logic error - child cannot end after parent');
}
}

currentTask = MainThreadTasks._createNewTaskNode(event);
tasks.push(currentTask);

continue;
nextTask.parent = currentTask;
currentTask.children.push(nextTask);
MainThreadTasks._assignAllTimersUntilTs(
currentTask,
nextTask.startTime,
priorTaskData,
timerInstallEventsReverseQueue
);
}

if (event.ph === 'X' || event.ph === 'B') {
// We're starting a nested event, create it as a child and make it the currentTask
const newTask = MainThreadTasks._createNewTaskNode(event, currentTask);
tasks.push(newTask);
currentTask = newTask;
} else {
if (currentTask.event.ph !== 'B') {
throw new Error(
`Fatal trace logic error - expected start event, got ${currentTask.event.ph}`);
}
currentTask = nextTask;
}

// We're ending an event, update the end time and the currentTask to its parent
currentTask.endTime = event.ts;
currentTask = currentTask.parent;
}
if (currentTask) {
MainThreadTasks._assignAllTimersUntilTs(
currentTask,
currentTask.endTime,
priorTaskData,
timerInstallEventsReverseQueue
);
}
}

// Starting from the last and bottom-most task, we finish any tasks that didn't end yet.
while (currentTask && !Number.isFinite(currentTask.endTime)) {
// The last event didn't finish before tracing stopped, use traceEnd timestamp instead.
currentTask.endTime = traceEndTs;
currentTask = currentTask.parent;
/**
* This function takes the raw trace events sorted in increasing timestamp order and outputs connected task nodes.
* To create the task heirarchy we make several passes over the events.
*
* 1. Create three arrays of X/B events, E events, and TimerInstall events.
* 2. Create tasks for each X/B event, throwing if a matching E event cannot be found for a given B.
* 3. Sort the tasks by ↑ startTime, ↓ duration.
* 4. Match each task to its parent, throwing if there is any invalid overlap between tasks.
*
* @param {LH.TraceEvent[]} mainThreadEvents
* @param {PriorTaskData} priorTaskData
* @param {number} traceEndTs
* @return {TaskNode[]}
*/
static _createTasksFromEvents(mainThreadEvents, priorTaskData, traceEndTs) {
/** @type {Array<LH.TraceEvent>} */
const taskStartEvents = [];
/** @type {Array<LH.TraceEvent>} */
const taskEndEvents = [];
/** @type {Array<LH.TraceEvent>} */
const timerInstallEvents = [];

// Phase 1 - Create three arrays of X/B events, E events, and TimerInstall events.
for (const event of mainThreadEvents) {
if (event.ph === 'X' || event.ph === 'B') taskStartEvents.push(event);
if (event.ph === 'E') taskEndEvents.push(event);
if (event.name === 'TimerInstall') timerInstallEvents.push(event);
}

// At this point we expect all tasks to have a finite startTime and endTime.
return tasks;
// Phase 2 - Create tasks for each taskStartEvent.
const tasks = MainThreadTasks._createTasksFromStartAndEndEvents(
taskStartEvents,
taskEndEvents,
traceEndTs
);

// Phase 3 - Sort the tasks by increasing startTime, decreasing duration.
const sortedTasks = tasks.sort(
(taskA, taskB) => taskA.startTime - taskB.startTime || taskB.duration - taskA.duration
);

// Phase 4 - Match each task to its parent.
MainThreadTasks._createTaskRelationships(sortedTasks, timerInstallEvents, priorTaskData);

return sortedTasks;
}

/**
Expand All @@ -157,7 +345,6 @@ class MainThreadTasks {
const childTime = task.children
.map(child => MainThreadTasks._computeRecursiveSelfTime(child, task))
.reduce((sum, child) => sum + child, 0);
task.duration = task.endTime - task.startTime;
task.selfTime = task.duration - childTime;
return task.duration;
}
Expand Down Expand Up @@ -213,7 +400,8 @@ class MainThreadTasks {

task.attributableURLs = attributableURLs;
task.children.forEach(child =>
MainThreadTasks._computeRecursiveAttributableURLs(child, attributableURLs, priorTaskData));
MainThreadTasks._computeRecursiveAttributableURLs(child, attributableURLs, priorTaskData)
);
}

/**
Expand All @@ -227,14 +415,18 @@ class MainThreadTasks {
}

/**
* @param {LH.TraceEvent[]} traceEvents
* @param {LH.TraceEvent[]} mainThreadEvents
* @param {number} traceEndTs
* @return {TaskNode[]}
*/
static getMainThreadTasks(traceEvents, traceEndTs) {
static getMainThreadTasks(mainThreadEvents, traceEndTs) {
const timers = new Map();
const priorTaskData = {timers};
const tasks = MainThreadTasks._createTasksFromEvents(traceEvents, priorTaskData, traceEndTs);
const tasks = MainThreadTasks._createTasksFromEvents(
mainThreadEvents,
priorTaskData,
traceEndTs
);

// Compute the recursive properties we couldn't compute earlier, starting at the toplevel tasks
for (const task of tasks) {
Expand Down

0 comments on commit 16fdf45

Please sign in to comment.