UberProf performance improvements

time to read 15 min | 2870 words

I mentioned before that I run into some performance problems, I thought it would be interesting to see how I solved them.

The underlying theme was finding O(N) operations are eradicating them. In almost all the cases, the code is more complex, but significantly faster

Old code New code
public static class TimestampExtensions
{
    public static void AddInPlace<T>(this IList<T> self, T model)
        where T : IHaveTimestamp
    {
        int index = self.Count;
        for (var i = 0; i < self.Count; i++)
        {
            if (self[i].Timestamp > model.Timestamp)
            {
                index = i;
                break;
            }
        }
        self.Insert(index, model);
    }    
}
public static class TimestampExtensions
{
    public static void AddInPlace(this IList<SqlStatement> self, SqlStatement model)
    {
        var timestamp = model.Timestamp;
        var index = self.Count;
        if (index > 0 && timestamp >= self[index - 1].Timestamp)
        {
            self.Add(model);
            return;
        }
        for (var i = 0; i < self.Count; i++)
        {
            if (self[i].Timestamp < timestamp) 
                continue;
            index = i;
            break;
        }
        self.Insert(index, model);
    }    
}

Here we optimize for the common case where we add the statement to the end of the list. We also changed from generic method with interface constraint to the actual type used. That allows the JIT to apply additional optimizations. That method alone was responsible for almost 10 minutes in my test runs.

Old code New code
var statement = session.Statements
    .Reverse()
    .Where(x => x.SessionId.Timestamp <= queryDuration.SessionId.Timestamp)
    .FirstOrDefault();
var statement = session.Statements.LastOrDefault();

session.Statements is already ordered by the statements timestamps. The code perform a slightly differently here, and it is less robust to async message streaming (when events that happened sequentially arrive in different order than they occured), but that is rare enough that I feel that the performance boost from this is acceptable.

Old code New code
public override void AfterAttachingToSession(
SessionInformation sessionInformation, SqlStatement statement) { var isSelectStatement = statement.IsSelectStatement; if (isSelectStatement == false) return; if (statement.RawSql == mySqlSelectLastIdentity) return; int countOfStatementsWithIdenticalSql = sessionInformation.Statements .Count(x => x.RawSql == statement.RawSql); if (countOfStatementsWithIdenticalSql <
configuration.MaxRepeatedStatementsForSelectNPlusOne) return; statement.AcceptAlert(AlertInformation.SelectNPlusOne()); }
public int GetCountOfStatementsByRawSql(string rawSql)
{
    int value;
    countsByRawSql.TryGetValue(rawSql, out value);
    return value;
}

public void AddStatement(SqlStatement statement)
{
    int count;
    if (countsByRawSql.TryGetValue(statement.RawSql, out count) == false)
        count = 0;
    countsByRawSql[statement.RawSql] = count + 1;
    
    statements.AddInPlace(statement);
    StatementsChanged();
}


public override void AfterAttachingToSession(
    SessionInformation sessionInformation, SqlStatement statement)
{
    var isSelectStatement = statement.IsSelectStatement;
    if (isSelectStatement == false)
        return;

    if (statement.RawSql == mySqlSelectLastIdentity)
        return;

    if (sessionInformation.GetCountOfStatementsByRawSql(statement.RawSql) < 
        configuration.MaxRepeatedStatementsForSelectNPlusOne)
        return;
    statement.AcceptAlert(AlertInformation.SelectNPlusOne());
}

On the left, we have the old code, that did the calculations manually. On the right, the two top methods will do the calculation up front, and then it is just a matter of reading it in the appropriate times.

Old code New code
protected void ClearStackTraceFromInfrastructureFrameworks(StackTraceInfo stackTrace)
{
    if (configuration.InfrastructureNamespaces == null)
        return;
    
    stackTrace.Frames =stackTrace.Frames.Where(
        frame =>
        {
            var isInfrastructureNamespace = 
                configuration.InfrastructureNamespaces.Any(
                ns => frame.Namespace != null && frame.Namespace.StartsWith(ns)
                );

            return isInfrastructureNamespace == false ||
                   configuration.NonInfrastructureNamespaces.Any(
                    ns => frame.Namespace != null && frame.Namespace.StartsWith(ns)
                    );
        }).ToArray();
}
public StackTraceProcessor(ProfilerConfiguration configuration)
{
    this.configuration = configuration;
    var sb = new StringBuilder();
    foreach (var infrastructureNamespace in 
        configuration.InfrastructureNamespaces ?? new List<string>())
    {
        sb.Append("(^")
            .Append(Regex.Escape(infrastructureNamespace))
            .Append(")|");
    }
    sb.Length = sb.Length - 1;
    namespacesToClear = new Regex(sb.ToString(),
        RegexOptions.Compiled | RegexOptions.IgnoreCase);
    sb.Length = 0;

    foreach (var infrastructureNamespace in 
        configuration.NonInfrastructureNamespaces ?? new List<string>())
    {
        sb.Append("(^")
            .Append(Regex.Escape(infrastructureNamespace))
            .Append(")|");
    }
    sb.Length = sb.Length - 1;
    namespacesToKeep = new Regex(sb.ToString(), 
        RegexOptions.Compiled | RegexOptions.IgnoreCase);
}

protected void ClearStackTraceFromInfrastructureFrameworks(StackTraceInfo stackTrace)
{
    if (configuration.InfrastructureNamespaces == null)
        return;

    stackTrace.Frames =
        stackTrace.Frames.Where(IsValidStackFrameMethod).ToArray();
}

private bool IsValidStackFrameMethod(StackTraceFrame frame)
{
    if (frame.Namespace == null)
        return true;
    if(namespacesToClear.IsMatch(frame.Namespace) == false)
        return true;
    return namespacesToKeep.IsMatch(frame.Namespace);
}

Interestingly enough, this code produced much better runtime performance, simply because, while it is more complex, the number of iterations that are happening are so much smaller.

Overall, the change result in order of magnitude or two difference for processing messages in the profiler pipeline.