0

I'm running a nested loop. I added an array in an attempt to speed it up.

When I have 100 rows and 41 columns of data in the "Active" sheet and 1000 rows and 41 columns of data in the "Closed" sheet, it takes about seven minutes to output the data into the "CompSheet".

Sub CompareColumns()

    'Turn off screen updating and automatic calculation
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual

    Dim i As Integer 'variable for the outer loop
    Dim j As Integer 'variable for the inner loop
    Dim ws As Worksheet 'variable for the sheet CompSheet
    Dim compareLat As Byte 'variable for the column that is being compared
    Dim compareLon As Byte 'variable for the column that is being compared
    Dim compareLatArray As Byte
    Dim compareLonArray As Byte
    Dim uniqueID As String 'variable for the unique identifier
    Dim ActiveSheetRows As Integer
    Dim ClosedSheetRows As Integer
    
    Dim closedArray As Variant ' variable for closed sheet data
    Dim closedArrayRow As Variant
    
    Dim activeArray As Variant ' variable for active sheet data
    Dim activeArrayRow As Variant
    
    Dim dLon As Double
    Dim x As Double
    Dim y As Double
    Dim lat_a As Double
    Dim lat_c As Double
    Dim lon_a As Double
    Dim lon_c As Double
    Dim result As Double
    Dim distance_toggle As Single
    Dim distance As Single

    
    ActiveSheetRows = Worksheets("Active").UsedRange.Rows.Count
    ClosedSheetRows = Worksheets("Closed").UsedRange.Rows.Count
    
    compareLat = 38 'change this variable to switch the column that is being compared
    compareLon = 39 'change this variable to switch the column that is being compared
    compareLatArray = 38 'change this variable to switch the column that is being compared
    compareLonArray = 39 'change this variable to switch the column that is being compared
    
    distance_toggle = 1.5
    
    'Store the data from the "Closed" worksheet into the array
    closedArray = Worksheets("Closed").UsedRange.Value
    
    'Store the data from the "Active" worksheet into the array
    activeArray = Worksheets("Active").UsedRange.Value
    
    'Check if the sheet CompSheet exists, if not create it
    On Error Resume Next
    Set ws = ThisWorkbook.Sheets("CompSheet")
    If ws Is Nothing Then
    
        ThisWorkbook.Sheets.Add(After:=ThisWorkbook.Sheets(ThisWorkbook.Sheets.Count)).Name = "CompSheet"
        
        'copy the header row from the "Closed" worksheet when it first creates the "CompSheet" worksheet
        Worksheets("Closed").Rows(1).Copy _
            Destination:=Worksheets("CompSheet").Range("A1")

        'Add the column header "uniqueID" to the last cell in row 1 of the "CompSheet" worksheet
        Worksheets("CompSheet").Cells(1, Worksheets("CompSheet").UsedRange.Columns.Count + 1).Value = "uniqueID"

        'Add the column header "CompDistance" to the last cell in row 1 of the "CompSheet" worksheet
        Worksheets("CompSheet").Cells(1, Worksheets("CompSheet").UsedRange.Columns.Count + 1).Value = "CompDistance"

    End If
    On Error GoTo 0

    'Loop through all the rows in the "Active" worksheet starting on row 2
    For i = 2 To UBound(activeArray, 1)

        'Loop through the array to look up the data in the "Closed" worksheet
        For j = 2 To UBound(closedArray, 1)
        
            lat_a = activeArray(i, compareLat)
            lat_c = closedArray(j, compareLatArray)
            lon_a = activeArray(i, compareLon)
            lon_c = closedArray(j, compareLonArray)

            'Calculationg for D2R = 0.0174532925199433
            'pi = 4 * Atn(1)
            'D2R = pi / 180#
            
            lat_a = 0.0174532925199433 * lat_a
            lat_c = 0.0174532925199433 * lat_c
            dLon = 0.0174532925199433 * (lon_c - lon_a)

            x = Sin(lat_a) * Sin(lat_c) + Cos(lat_a) * Cos(lat_c) * Cos(dLon)
            y = Sqr((Cos(lat_c) * Sin(dLon)) ^ 2 + (Cos(lat_a) * Sin(lat_c) - Sin(lat_a) * Cos(lat_c) * Cos(dLon)) ^ 2)

            distance = WorksheetFunction.Atan2(x, y) * 3963.19
            
            If distance <= distance_toggle Then
            
                'Copy the row from the Closed worksheet to the CompSheet worksheet in the next available row
                Worksheets("CompSheet").Rows(Worksheets("CompSheet").UsedRange.Rows.Count + 1).Insert
            
                closedArrayRow = Worksheets("Closed").Cells(j, 1).Resize(1, UBound(closedArray, 2))
            
                'Worksheets("CompSheet").Range("B1").Resize(UBound(closedArrayRow, 1), UBound(closedArrayRow, 2)).Value = closedArrayRow
                Worksheets("CompSheet").Rows(Worksheets("CompSheet").UsedRange.Rows.Count).Resize(1, 41).Value = closedArrayRow
            
                
                'Create a uniqueID by combining column 6 from both the Active and Closed worksheets with a space and "&" in between
                uniqueID = activeArray(i, 5) & " " & "&" & " " & closedArray(j, 5)

                'Paste the uniqueID in the next available column of the new row in the CompSheet worksheet
                Worksheets("CompSheet").Cells(Worksheets("CompSheet").UsedRange.Rows.Count, compareLon + 1).Value = uniqueID

                'Paste the distance value in the corresponding column of the new row in the CompSheet worksheet
                Worksheets("CompSheet").Cells(Worksheets("CompSheet").UsedRange.Rows.Count, compareLon + 2).Value = distance
      
            End If
        Next j
    Next i

    'Formatting "CompSheet" Data
    Worksheets("CompSheet").Columns.AutoFit
    Worksheets("CompSheet").Range("AO:AO").NumberFormat = "#,##0.0"
    Worksheets("CompSheet").UsedRange.Font.Bold = False
    Worksheets("CompSheet").Cells(1, 1).EntireRow.Font.Bold = True
    
    'Turn on screen updating and automatic calculation
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic

End Sub

In addition to arrays, I added other code, such as:

Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual

Google drive link for the Excel file. https://drive.google.com/file/d/1GfR5RbWcHFQC-5oY9izDOQWbzZkvBwXi/view?usp=share_link

My code took eight minutes. I'd like to scale this up to a dataset about 500 times this size. Which would take 60 hours to run based on a linear time calculation.

I'm trying to compare real estate listings (properties), properties that are currently listed for sale in the "Active" sheet to ones that are already sold, in the "Closed" sheet.

For every property (row) in the "Active" sheet, I need to check every sold property in the "Closed" sheet based on the distance toggle and if the sold property is within the specified distance (2 miles) then I want to copy the sold listing row from the "Closed" sheet into the "CompSheet" and also paste the Unique ID (both addresses concatenated) and the 'distance' variable, for that comparison.

9
  • When you're not sure where the slowdown is exactly, you can use a trick to profile your code. First, remove the ScreenUpdating stuff GPT suggested, I don't think it's doing much here anyway. Start your code running and press the ctrl-brk key. Answer Debug (not Continue) and you will drop into VBA with the statement currently executing being highlighted. Make a note of it. Then in the Debug window/toolbar press the Continue key to resume execution. Do this several times and build a histogram of your executing code. Commented Jan 27, 2023 at 16:37
  • 1
    Maybe check your UsedRange ranges are not larger than you expect? Sometimes formatting can make them much larger than the actual data. Commented Jan 27, 2023 at 17:55
  • Calling worksheet functions in a loop can be slower than a pure VBA approach. Eg try replacing WorksheetFunction.Atan2 with something like stackoverflow.com/a/11252386/478884 Commented Jan 27, 2023 at 18:02
  • The UsedRanges are the proper size. I can see that in the locals window. I don’t have WorksheetFunction.Atan2 in my code? Commented Jan 27, 2023 at 18:36
  • How many rows are there in Active and Closed? I created a test sheet with 1000 rows in each and worst case, we could be talking 1,000,000 iterations. Commented Jan 27, 2023 at 18:41

4 Answers 4

1

Should take less than 10 seconds

Option Explicit

Sub CompareColumns()

    'change these variable to switch the column that is being compared
    Const compareLat = 38 'AL
    Const compareLon = 39 'AM
    Const compareLatArray = 38 'AL
    Const compareLonArray = 39 'AM
    
    Const distance_toggle = 1.5
    
    Dim wb As Workbook
    Dim wsClosed As Worksheet, wsActive As Worksheet, wsComp As Worksheet
    Dim n As Long, rComp As Long, colsClosed As Long, t0 As Single: t0 = Timer
    
    Set wb = ThisWorkbook
    With wb
        Set wsActive = .Sheets("Active")
        Set wsClosed = .Sheets("Closed")
        
        n = .Sheets.Count
        On Error Resume Next
        Set wsComp = .Sheets("CompSheet")
        On Error GoTo 0

        If wsComp Is Nothing Then
    
            Set wsComp = .Sheets.Add(After:=.Sheets(n))
            With wsComp
                .Name = "CompSheet"
                'copy the header row from the "Closed" worksheet
                'when it first creates the "CompSheet" worksheet
                wsClosed.Rows(1).Copy .Range("A1")

                'Add the column header "uniqueID" and "CompDistance"
                'to the end of row 1 of the "CompSheet" worksheet
                colsClosed = .UsedRange.Columns.Count
                .Cells(1, colsClosed + 1).Value = "uniqueID"
                .Cells(1, colsClosed + 2).Value = "CompDistance"
                
                'Formatting "CompSheet" Data
                .Columns.AutoFit
                .Range("AO:AO").NumberFormat = "#,##0.0"
                .UsedRange.Font.Bold = False
                .Cells(1, 1).EntireRow.Font.Bold = True
             End With
        Else
             colsClosed = wsClosed.UsedRange.Columns.Count
        End If
        rComp = wsComp.UsedRange.Rows.Count + 1
    End With
    
    'Store the data from the "Active" and "Closed"
    'worksheet into the array
    Dim arActive, arClosed
    arActive = wsActive.UsedRange.Value
    arClosed = wsClosed.UsedRange.Value
        
    Dim i As Long, j As Long,  k As Long
    Dim lat_a As Double, lon_a As Double, lat_c As Double, lon_c As Double
    Dim x As Double, y As Double, dLon As Double, distance As Double
    Dim uniqueID As String
    
    'Calculationg for D2R = 0.0174532925199433
    'pi = 4 * Atn(1)
    'D2R = pi / 180#
    Const FACTOR As Double = 1.74532925199433E-02
    
    ' dimension max possible rows
    Dim arComp, z As Long
    z = UBound(arActive) * UBound(arClosed)
    ReDim arComp(1 To z, 1 To colsClosed + 2)
    rComp = 0
    
    'Loop through all the rows in the "Active" worksheet starting on row 2
    For i = 2 To UBound(arActive, 1)
    
        lat_a = arActive(i, compareLat) * FACTOR
        lon_a = arActive(i, compareLon)

        'Loop through the array to look up the data in the "Closed" worksheet
        For j = 2 To UBound(arClosed, 1)
        
            lat_c = arClosed(j, compareLatArray) * FACTOR
            lon_c = arClosed(j, compareLonArray)
            dLon = FACTOR * (lon_c - lon_a)
    
            x = Sin(lat_a) * Sin(lat_c) + Cos(lat_a) * Cos(lat_c) * Cos(dLon)
            y = Sqr((Cos(lat_c) * Sin(dLon)) ^ 2 + (Cos(lat_a) * Sin(lat_c) - Sin(lat_a) * Cos(lat_c) * Cos(dLon)) ^ 2)
    
            distance = WorksheetFunction.Atan2(x, y) * 3963.19
    
            If distance <= distance_toggle Then
                    
                'Create a uniqueID by combining column 6 from
                'both the Active and Closed worksheets
                'with a space and "&" in between
                uniqueID = arActive(i, 5) & " " & "&" & " " & arClosed(j, 5)
                
                'Copy the row from the Closed worksheet to the
                'CompSheet worksheet in the next available row
                'Paste the uniqueID and distance in the next available column
                'of the new row in the CompSheet worksheet
                rComp = rComp + 1
                For k = 1 To colsClosed
                    arComp(rComp, k) = arClosed(j, k)               
                Next
                arComp(rComp, k) = uniqueID
                arComp(rComp, k + 1) = distance
                
            End If
        Next j
    Next i
    
    'Turn off screen updating and automatic calculation
    Application.Calculation = xlCalculationManual
    Application.ScreenUpdating = False

    ' result
    Dim rngComp As Range
    With wsComp
        Set rngComp = .Cells(.UsedRange.Rows.Count + 1, "A")
        Set rngComp = rngComp.Resize(rComp, colsClosed + 2)
        rngComp = arComp
    End With

    'Turn on screen updating and automatic calculation
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    
    MsgBox "Data written " & rngComp.Address, vbInformation, "Took " & Format(Timer - t0, "0.00 secs")
    
End Sub
Sign up to request clarification or add additional context in comments.

13 Comments

Thanks CDP1802 - that did take only 7 seconds. However, the data copied to the comp sheet is incorrect. I only want to copy the row data from the Closed sheet when the active sheet row has a distance of less than 1.5 miles to the closed sheet row.....I'm comparing real estate properties by distance. Active properties and properties that have sold 'closed'. drive.google.com/file/d/1GfR5RbWcHFQC-5oY9izDOQWbzZkvBwXi/…
@SteveS This is the right idea ( eliminate writing to a sheet inside the for loops) but the for k logic is flawed. It overwrites data each iteration, due to the k+1 and k+2
Hi Chris, how do I fix that for k logic?
@steves Updated code - fixed k loop
@CDP1802 I've update the original post with more detail on what I'm trying to achieve. Your output into 'CompSheet' is just copying every Closed row for every Active row. It should only copy to CompSheet if distance is less than 1.5 miles 'distance toggle'. Which would leave under 40k rows copied to comp sheet. See google drive file for what the output looks like. drive.google.com/file/d/1GfR5RbWcHFQC-5oY9izDOQWbzZkvBwXi/…
|
1

One thing I found so far that is not needed is this:

Worksheets("CompSheet").Rows(Worksheets("CompSheet").UsedRange.Rows.Count + 1).Insert

This looks like you're adding a row to the bottom. You don't have to add rows to the bottom, they're already there - just comment that out and add 1 to your "copy" statement, Rows.Count + 1.

Worksheets("CompSheet").Rows(Worksheets("CompSheet").UsedRange.Rows.Count + 1).Resize(1, 41).Value = closedArrayRow

Comments

1

This should be faster. Compiles, but untested.

  • Use of worksheet references makes your code cleaner.
  • Swapped out the WorksheetFunction call for a faster VBA version.
  • Skipped the Insert when adding data to the comp sheet (as already suggested by Nick).
  • Use Const for fixed values
  • Avoid UsedRange since it can be unreliable/unpredictable
Sub CompareColumns()

    Const NUM_COLS As Long = 39         
    Const ID_COL As Long = 40
    Const DIST_COL As Long = 41
    
    Const COL_ACT_LAT As Long = 38
    Const COL_ACT_LON As Long = 39
    Const COL_CLS_LAT As Long = 38
    Const COL_CLS_LON As Long = 39
    Const DIST_TOGGLE As Double = 1.5
    
    Dim wb As Workbook, wsActive As Worksheet, wsClosed As Worksheet, wsComp As Worksheet
    Dim rngClosed As Range, rngActive As Range
    Dim i As Long, j As Long
    Dim closedArray As Variant, activeArray As Variant
    Dim lat_a As Double, lat_c As Double, lon_a As Double, lon_c As Double
    Dim distance As Double, lastRw As Long, destRw As Range
    
    Set wb = ThisWorkbook
    Set wsActive = wb.Worksheets("Active")
    'if your data has no empty rows or columns
    Set rngActive = wsActive.Range("A1").CurrentRegion.Resize(, NUM_COLS)
    activeArray = rngActive.Value
    
    Set wsClosed = wb.Worksheets("Closed")
    Set rngClosed = wsClosed.Range("A1").CurrentRegion.Resize(, NUM_COLS)
    closedArray = rngClosed.Value
    
    'add the comparison sheet if not already present
    On Error Resume Next 'ignore error if sheet is missing
    Set wsComp = wb.Worksheets("CompSheet")
    On Error GoTo 0      'stop ignoring errors as soon as it's no longer needed....
    If wsComp Is Nothing Then
        Set wsComp = wb.Worksheets.Add(After:=wb.Worksheets(wb.Worksheets.Count))
        wsComp.Name = "CompSheet"
        wsClosed.Range("A1").Resize(1, NUM_COLS).Copy wsComp.Range("A1")
        wsComp.Cells(1, ID_COL).Value = "uniqueID"
        wsComp.Cells(1, DIST_COL).Value = "CompDistance"
        lastRw = 1
    Else
        'find last row with any data
        lastRw = wsComp.Cells.Find(What:="*", SearchOrder:=xlByRows, _
                                   SearchDirection:=xlPrevious).Row
    End If
    Set destRw = wsComp.Rows(lastRw + 1) 'first empty row on comp sheet
    
    For i = 2 To UBound(activeArray, 1) 'loop "active" array

        lat_a = activeArray(i, COL_ACT_LAT) 'you can read these in the outer loop
        lon_a = activeArray(i, COL_ACT_LON)
        
        For j = 2 To UBound(closedArray, 1) 'loop "closed" array
        
            lat_c = closedArray(j, COL_CLS_LAT)
            lon_c = closedArray(j, COL_CLS_LON)
            distance = DistanceCalc(lat_a, lon_a, lat_c, lon_c)
        
            If distance <= DIST_TOGGLE Then
                destRw.Cells(1).Resize(1, NUM_COLS).Value = rngClosed.Rows(j).Value
                destRw.Cells(ID_COL).Value = activeArray(i, 5) & " " & "&" & " " & closedArray(j, 5)
                destRw.Cells(DIST_COL).Value = distance
                Set destRw = destRw.Offset(1, 0)
            End If
        Next j
    Next i

    With wsComp 'Formatting "CompSheet" Data
        .Columns.AutoFit
        .Range("AO:AO").NumberFormat = "#,##0.0"
        .UsedRange.Font.Bold = False
        .Cells(1, 1).EntireRow.Font.Bold = True
    End With
End Sub

'Miles between (latA,lonA) and (latB,lonB)
Function DistanceCalc(latA As Double, lonA As Double, latB As Double, lonB As Double) As Double
    Const RAD_MULT As Double = 1.74532925199433E-02
    Dim dlon As Double, x As Double, y As Double
    latA = latA * RAD_MULT
    latB = latB * RAD_MULT
    dlon = RAD_MULT * (lonB - lonA)
    x = Sin(latA) * Sin(latB) + Cos(latA) * Cos(latB) * Cos(dlon)
    y = Sqr((Cos(latB) * Sin(dlon)) ^ 2 + (Cos(latA) * Sin(latB) - Sin(latA) * Cos(latB) * Cos(dlon)) ^ 2)
    'DistanceCalc = WorksheetFunction.Atan2(x, y) * 3963.19
    DistanceCalc = ArcTan2(x, y) * 3963.19 'VBA version is faster
End Function

'VBA version of WorksheetFunction.Atan2
Function ArcTan2(x As Double, y As Double) As Double
    Const PI As Double = 3.14159265358979
    Const PI_2 As Double = 1.5707963267949
    Select Case x
        Case Is > 0
            ArcTan2 = Atn(y / x)
        Case Is < 0
            ArcTan2 = Atn(y / x) + PI * Sgn(y)
            If y = 0 Then ArcTan2 = ArcTan2 + PI
        Case Is = 0
            ArcTan2 = PI_2 * Sgn(y)
    End Select
End Function

8 Comments

Tim, I like your approach, but there is still a bunch of calculations you can pull from inner loop. Mostly around closedArray, which is accessed over and over. Not so much ActiveArray, whose calcs are only performed once. Suggest defining ClosedArray as a Double Precision array of (NClosedRows,6). Populate in a loop, first two columns are lat & lon from ClosedSheet times Rad_Mult, 3rd and 4th columns are Cos(lat) and Cos(lon), and 5th and 6th are Sin(lat) and Sin(lon). Now you're not calculating ClosedArray trigonometric formulas over and over, and the Variant Type overhead is less
Agreed some optimizations could still be made, but likely at the cost of adding more complexity to the code. As-is, 1M calls to DistanceCalc takes only 1.5 secs, and OP has their current run-time at 7 minutes. Most likely the bulk of that was either UsedRange being larger than expected, or the Insert operation. I' don't have ant representative data to test with, so The OP will need to weigh in on how much speed impreovment is enough.
I've added a link in the OP with the excel file I'm actually working with, and added a better explanation of what I'm trying to achieve.
Tim, your code ran super quickly but the out in the CompSheet was not what I'm looking for.
You can always modify that part of the code... Or explain exactly why it's not what you're looking for and I can try to fix it.
|
0

There are a few basic things you can do to speed code up. The easiest is to disable screen updating and calculations. You can use error handling to ensure they get re-enabled.

Sub MyFasterProcess()
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual

    On Error GoTo Finally
    Call MyLongRunningProcess()

Finally:
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    If Err > 0 Then Err.Raise Err
End Sub

Some people like to put that into some helper functions, or even a class to manage the state over several processes.

The most common culprit for long running processes is reading from and writing to cells. It is significantly faster to read an array than it is to read individual cells in the range.

Consider the following:

Sub SlowReadWrite()
    Dim src As Range
    Set src = Range("A1:AA100000")

    Dim c As Range
    For Each c In src
        c.Value = c.Value + 1
    Next c
End Sub

This will take a very, very long time. Now let's do it with an array. Read once. Write once. No need to disable screen updating or set calculation to manual either. This will be just as fast with them on.

Sub FastReadWrite()
    Dim src As Range
    Set src = Range("A1:AA100000")

    'Read once.
    Dim vals() As Variant
    vals = r.Value

    Dim r As Long, c As Long
    For r = 1 To UBound(vals, 1)
        For c = 1 To UBound(vals, 2)
            vals(r, c) = vals(r, c) + 1
        Next c
    Next r

    'Write once.
    src.Value = vals
End Sub

Your code looks like it's still performing read / write actions in the loop which is what is slowing you down.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.