I want to process a ton of JSON objects as quickly as possible.
I've done my best to make the processing as concise and efficient as I can.
How can I make it more efficient?
private void processButton_Click(object sender, EventArgs e)
{
// For measuring how long the processing takes.
var stopwatch = Stopwatch.StartNew();
// Get the IDs of the completed RootObjects.
// There are no duplicate IDs.
var completed = new List<string>();
foreach (var file in Directory.EnumerateFiles("C:\\Completed", "*.json"))
{
completed.AddRange(
JsonConvert.DeserializeObject<List<RootObject>>(File.ReadAllText(file)).Select(o => o.id));
}
Console.WriteLine($"completed.Count: {completed.Count}");
// Get the unfinished RootObjects.
//
// 78,198 of the unfinished RootObjects share their ID with another.
// The duplicates are removed in the next step. (#2)
//
// The unfinished RootObjects also contain ALL of the completed RootObjects.
// The completed RootObjects are ignored in the next step. (#1)
var unfinished = new List<RootObject>();
foreach (var file in Directory.EnumerateFiles("C:\\Unfinished", "*.json"))
{
unfinished.AddRange(JsonConvert.DeserializeObject<List<RootObject>>(File.ReadAllText(file)));
}
Console.WriteLine($"unfinished.Count: {unfinished.Count}");
var processed =
unfinished.Where(o => !completed.Contains(o.id)) // (#1) Ignore all completed RootObjects.
.GroupBy(o => o.id).Select(objects => objects.First()) // (#2) Remove all duplicate RootObjects.
.ToList();
Console.WriteLine($"processed.Count: {processed.Count}");
stopwatch.Stop();
Console.WriteLine($"stopwatch.ElapsedMilliseconds: {stopwatch.ElapsedMilliseconds}");
}
// Output:
// complete.Count: 35649
// unfinished.Count: 250315
// processed.Count: 136468
// stopwatch.ElapsedMilliseconds: 75875
If you require any further information, let me know.