To count tags in XML from folders and subfolders

84 views Asked by At

The below mention code can successfully count the required tags in an XML files and also provides name of file and tag count in excel sheet. I have just one query that currently it only reads the folder individually. However if there are 300 folders in a parent folder i need to select each folder every time. Is there anyway if anyone can amend the code so that if there are 300 folders in a parent folder in read each and every file (XML) in all subfolders. This will be very helpful for me.

I have tried to do it my own but this is beyond my capacity.

Option Explicit

Sub process_folder()

    Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
    Set wb = ThisWorkbook
    Set ws = wb.Sheets(1)
    ws.UsedRange.Clear
    ws.Range("A1:C1") = Array("Source", "<Headline> Tag Count")
    iRow = 1
    
    ' create FSO and regular expression pattern
    Dim FSO As Object, ts As Object, regEx As Object, txt As String
    Set FSO = CreateObject("Scripting.FileSystemObject")
    Set regEx = CreateObject("VBScript.RegExp")
    With regEx
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .pattern = "<Headline>(.*)</Headline>"
   
        
    End With

    'Opens the folder picker dialog to allow user selection
    Dim myfolder As String, myfile As String, n As Long
    With Application.FileDialog(msoFileDialogFolderPicker)
        .Title = "Please select a folder"
        .Show
        .AllowMultiSelect = False
        If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
            MsgBox "You did not select a folder"
            Exit Sub
        End If
        myfolder = .SelectedItems(1) & "\" 'Assign selected folder to MyFolder
    End With
    
    'Loop through all files in a folder until DIR cannot find anymore
    Application.ScreenUpdating = False
    myfile = Dir(myfolder & "*.xml")
    Do While myfile <> ""
    
        iRow = iRow + 1
        ws.Cells(iRow, 1) = myfile
    
        ' open file and read all lines
        Set ts = FSO.OpenTextFile(myfolder & myfile)
        txt = ts.ReadAll
        ts.Close
                                   
        ' count pattern matches
        Dim m As Object
        If regEx.test(txt) Then
            Set m = regEx.Execute(txt)
            ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
            ws.Cells(iRow, 3) = m.Count
            
        Else
            ws.Cells(iRow, 2) = "No tags"
            ws.Cells(iRow, 3) = 0
        End If

        myfile = Dir 'DIR gets the next file in the folder
    Loop
    
    ' results
    ws.UsedRange.Columns.AutoFit
    Application.ScreenUpdating = True
    

End Sub
2

There are 2 answers

5
CDP1802 On BEST ANSWER

Use Subfolders property of the parent folder object.

Option Explicit

Sub process_folder()

    Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
    Set wb = ThisWorkbook
    Set ws = wb.Sheets(1)
    ws.UsedRange.Clear
    ws.Range("A1:B1") = Array("Source", "<Headline> Tag Count")
    iRow = 1
    
    ' create FSO and regular expression pattern
    Dim fso As Object, ts As Object, regEx As Object, txt As String
    Set fso = CreateObject("Scripting.FileSystemObject")
    Set regEx = CreateObject("VBScript.RegExp")
    With regEx
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .Pattern = "<Headline>(.*)</Headline>"
    End With

    'Opens the folder picker dialog to allow user selection
    Dim myfolder, myfile As String, n As Long
    Dim parentfolder As String, oParent
   
    With Application.FileDialog(msoFileDialogFolderPicker)
        .Title = "Please select a folder"
        .Show
        .AllowMultiSelect = False
        If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
            MsgBox "You did not select a folder"
            Exit Sub
        End If
        parentfolder = .SelectedItems(1) 'Assign selected folder to ParentFolder
    End With
    Set oParent = fso.getFolder(parentfolder)
    
    ' build collection or files
    Dim colFiles As Collection
    Set colFiles = New Collection
    Call GetFiles(oParent, "xml", colFiles)
    
    'Loop through all files in collection
    Application.ScreenUpdating = False
    For n = 1 To colFiles.Count
        myfile = colFiles(n)
        
        iRow = iRow + 1
        ws.Cells(iRow, 1) = myfile
    
        ' open file and read all lines
        Set ts = fso.OpenTextFile(myfile)
        txt = ts.ReadAll
        ts.Close
                                   
        ' count pattern matches
        Dim m As Object
        If regEx.test(txt) Then
            Set m = regEx.Execute(txt)
            ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
            ws.Cells(iRow, 3) = m.Count
        Else
            ws.Cells(iRow, 2) = "No tags"
            ws.Cells(iRow, 3) = 0
        End If

        ' results
        ws.UsedRange.Columns.AutoFit
    Next
    Application.ScreenUpdating = True
    MsgBox colFiles.Count & " Files process", vbInformation

End Sub

Sub GetFiles(oFolder, ext, ByRef colFiles)

    Dim f As Object
    For Each f In oFolder.Files
        If f.Name Like "*." & ext Then
            colFiles.Add oFolder.Path & "\" & f.Name
        End If
    Next
    
     ' call recursively
    For Each f In oFolder.subfolders
        Call GetFiles(f, ext, colFiles)
    Next
     
End Sub
1
VBasic2008 On

Loop Through All Folders and Subfolders

  • In this post under the title Subfolder Paths to Collection, you can find the CollSubfolderPaths function, which will return the paths of all folders and their subfolders in a collection.
  • In your code, you could utilize it in the following way.
Sub process_folder()
    
    ' Preceding code...
    
    Application.ScreenUpdating = False
    
    ' Return the paths of all folders and subfolders in a collection.
    Dim MyFolders As Collection: Set MyFolders = CollSubfolderPaths(myfolder)
    
    Dim Item As Variant
    
    ' Loop through the items in the collection.
    For Each Item In MyFolders
        ' Get the first file.
        myfile = Dir(Item & "\" & "*.xml")
        'Loop through all files in a folder until DIR cannot find anymore
        Do While myfile <> ""
            ' The same code...
        Loop
    
    Next Item

    ' results
    ws.UsedRange.Columns.AutoFit
    Application.ScreenUpdating = True

End Sub