Azure Log Analytics Workspace only picking up one VM

141 views Asked by At

I'm having trouble with some Terraform I'm writing.

What I'm attempting to do is deploy Terraform which does the following:

  1. Install Azure Monitor Agent on existing VMs (Also added change tracking extension because I wasn't sure if it was needed)
  2. Create a Data Collection Rule and Data Collection Endpoint with destination for the data pointed at my log analytics workspace
  3. Create a Data Collection Rule Association which ties the DCR to the target resource(s) which, in my case, is multiple existing VMs

Running the Terraform, the plan seems to look correct for what I'm attempting as it does successfully run the above mentioned steps for all the VMs. The odd part is, when I run queries in my Log Workspace, I'm only getting a return for 1 VM. So I seem to be getting the desired result from my Terraform, just not for all the VMs I am specifying.

I've checked off the following items while investigating the issue:

  1. Azure Monitor Agent and Change Tracking extensions are successfully installed on all target VMs and provisioning state returns "Succeeded"
  2. All expected VMs are showing up under the Data Collection Rule
  3. Data collection rule association exists for each VM
resource "azurerm_virtual_machine_extension" "ama_windows" {
  for_each                = { for i, v in flatten(data.azurerm_resources.vms[*].resources): i => v }

  name                       = "AzureMonitorWindowsAgent"
  virtual_machine_id         = each.value.id
  publisher                  = "Microsoft.Azure.Monitor"
  type                       = "AzureMonitorWindowsAgent"
  type_handler_version       = "1.0"
  auto_upgrade_minor_version = true

    settings = <<SETTINGS
    {
        "workspaceId": "${azurerm_log_analytics_workspace.law.id}",
        "stopOnMultipleConnections": "false"
    }
  SETTINGS
   protected_settings = <<PROTECTED_SETTINGS
    {
      "workspaceKey": "${data.azurerm_log_analytics_workspace.key.primary_shared_key}"
    }
  PROTECTED_SETTINGS
}


resource "azurerm_virtual_machine_extension" "ChangeTracking-Windows" {
  for_each                = { for i, v in flatten(data.azurerm_resources.vms[*].resources): i => v }

  name                       = "ChangeTracking-Windows"
  virtual_machine_id         = each.value.id
  publisher                  = "Microsoft.Azure.ChangeTrackingAndInventory"
  type                       = "ChangeTracking-Windows"
  type_handler_version       = "2.0"
  automatic_upgrade_enabled  = true
  auto_upgrade_minor_version = true
  depends_on                 = [ azurerm_virtual_machine_extension.ama_windows, azurerm_log_analytics_workspace.law ]

    settings = <<SETTINGS
    {
        "workspaceId": "${azurerm_log_analytics_workspace.law.id}",
        "stopOnMultipleConnections": "false"
    }
  SETTINGS
   protected_settings = <<PROTECTED_SETTINGS
    {
      "workspaceKey": "${data.azurerm_log_analytics_workspace.key.primary_shared_key}"
    }
  PROTECTED_SETTINGS
}


resource "azurerm_automation_account" "aa" {
  name                = "${module.config.azure_automation_account}001"
  location            = var.aa_location
  resource_group_name = module.rg.name

  public_network_access_enabled = true

  identity {
    type         = "SystemAssigned, UserAssigned"
    identity_ids = [azurerm_user_assigned_identity.AzureMonitoring-UID.id]
  }

  sku_name = "Basic"

  depends_on = [module.rg]
}


resource "azurerm_log_analytics_workspace" "law" {
  name                = "${module.config.azure_log_analytics_workspace}001"
  location            = var.location
  resource_group_name = module.rg.name
  sku                 = "PerGB2018"
  retention_in_days   = 30
}



resource "azurerm_log_analytics_solution" "vminsights" {
    solution_name         = "${module.config.azure_log_analytics_solution}001"
    resource_group_name   = module.rg.name
    location              = var.location
    workspace_resource_id = azurerm_log_analytics_workspace.law.id
    workspace_name        = azurerm_log_analytics_workspace.law.name
    plan {
        publisher         = "Microsoft.Azure.Monitor"
        product           = "ChangeTrackingAndInventory"
    }
}


resource "azurerm_log_analytics_linked_service" "laws" {
  resource_group_name = module.rg.name
  workspace_id        = azurerm_log_analytics_workspace.law.id
  read_access_id      = azurerm_automation_account.aa.id
}


resource "azurerm_monitor_data_collection_endpoint" "endpoint" {
  name                          = "${module.config.azure_monitor_data_collection_rule_endpoint}001"
  resource_group_name           = module.rg.name
  location                      = var.location
  kind                          = "Windows"
  public_network_access_enabled = true
  description                   = "connection that Logs ingestion API uses to send collected data to Azure Monitor"
}


resource "azurerm_monitor_data_collection_rule" "default-rule" {
  name                        = "${module.config.azure_monitor_data_collection_rule}001"
  location                    = var.location
  resource_group_name         = module.rg.name
  data_collection_endpoint_id = azurerm_monitor_data_collection_endpoint.endpoint.id
  depends_on                  = [ azurerm_monitor_data_collection_endpoint.endpoint ]
 
  destinations {
      log_analytics {
          workspace_resource_id = azurerm_log_analytics_workspace.law.id
          name                  = "log-analytics"
      }
  }
  data_flow {
      streams = [
          "Microsoft-InsightsMetrics",          
          "Microsoft-Syslog",
          "Microsoft-Event",
          "Microsoft-Perf",
          "Microsoft-W3CIISLog"
      ]
      destinations = ["log-analytics"]
  }

  data_sources {
      extension {
          extension_name     = "ChangeTracking-Windows"
          name               = "CTDataSource-Windows"
          streams            = [
              "Microsoft-ConfigurationChange",
              "Microsoft-ConfigurationChangeV2",
              "Microsoft-ConfigurationData"
          ]
      }
      syslog {
          facility_names = ["*"]
          log_levels     = ["*"]
          name           = "Syslog"
          streams        = ["Microsoft-Syslog"]
      }
      iis_log {
          streams         = ["Microsoft-W3CIISLog"]
          name            = "iis-Logs"
          log_directories = ["C:\\Logs\\W3SVC1"]
      }
      performance_counter {
          streams                       = ["Microsoft-Perf", "Microsoft-InsightsMetrics"]
          sampling_frequency_in_seconds = 60
          name                          = "Performance-Data"
          counter_specifiers            = [
            "\\Processor Information(_Total)\\% Processor Time",
            "Memory(*)\\% Used Memory",
            "Processor(*)\\% Processor Time",
            "ServiceName\\Status"
          ]
          
       }
    }
}


resource "azurerm_monitor_data_collection_rule_association" "dcr-association" {
  for_each                = { for i, v in flatten(data.azurerm_resources.vms[*].resources): i => v }

  name                    = "${module.config.azure_monitor_data_collection_rule_association}00${each.key}"
  target_resource_id      = each.value.id
  data_collection_rule_id = azurerm_monitor_data_collection_rule.default-rule.id
  description             = "Associates the DCR to the resource for monitoring"

  depends_on = [ azurerm_monitor_data_collection_rule.default-rule ]
}

Any insight is greatly appreciated!

2

There are 2 answers

0
TheMilli On BEST ANSWER

Sorry for the delay in response to this thread.

Turns out the specific issue I was having was related to managed identities on the VMs.

To ensure consistency across the infrastructure or any future deployed items I did add the following for my specific case.

resource "null_resource" "assign_uai" {
  for_each = { for i, v in flatten(data.azurerm_resources.vms[*].resources): i => v }

  triggers = {
    local-exec = timestamp()
  }
 
  provisioner "local-exec" {
    command = "PowerShell -file ../../Scripts/Core/Set-UAI.ps1 -ResourceGroupName ${each.value.resource_group_name} -VM ${each.value.name} -IdentityType ${var.managed_id} -IdentityId ${azurerm_user_assigned_identity.AzureMonitoring-UID.id} -ApplicationId ${data.azuread_application.auth[0].client_id} -ClientSecret ${azuread_application_password.PW[0].value}"
 
    interpreter = ["pwsh", "-c"]
  }
  depends_on = [ azurerm_virtual_machine_extension.ama_windows, azuread_service_principal.AzureMonitoring ]
}

This points to the following PS script and passes the appropriate parameters for authenticating to Azure and updating any VM found in the loop.

Param
(
    [Parameter(Mandatory=$True)]
    [string]$ResourceGroupName,
    [Parameter(Mandatory=$True)]
    [string]$VmName,
    [Parameter(Mandatory=$True)]
    [string]$IdentityType,
    [Parameter(Mandatory=$True)]
    [string]$ApplicationId,
    [Parameter(Mandatory=$True)]
    [string]$ClientSecret,
    [Parameter(Mandatory=$True)]
    [string]$IdentityId
    
)

$Credential = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList $ApplicationId, $ClientSecret
Connect-AzAccount -ServicePrincipal -TenantId <tenantID> -Credential $Credential

$vms = get-AzVM -ResourceGroupName $ResourceGroupName -Name $VmName

foreach ($vm in $vms) {
    update-AzVm -ResourceGroupName $ResourceGroupName -VM $VMName -IdentityType $IdentityType -IdentityId $IdentityId
}

1
Vinay B On

I tired Terraform Configuration for Azure Monitor Agent Deployment and Data Collection on VMs

If you're only seeing data from one VM in your Log Analytics workspace, despite all VMs showing as associated in the Azure portal, there might be a few areas to troubleshoot that aren't directly related to Terraform syntax or configuration errors:

  1. Data Collection Rule Scope and Filters: Make sure that the scope of your data collection rule covers all the VMs that you need, and avoid any filters that could reduce the amount of data that you collect from your resources..

  2. Agent Configuration and Health: To ensure that the Azure Monitor Agent (AMA) and Change Tracking extension work properly on all VMs, check their configuration and operation status. Don't rely on the provisioning state showing "Succeeded," but inspect the agent's operational logs for any potential problems that could interfere with data collection or transmission.

Given these considerations, let's outline a Terraform configuration that aligns with your requirements. This configuration will include:

  • Azure Monitor Agent installation on VMs.
  • Change Tracking extension installation (optional based on your needs).
  • Data Collection Rule and Endpoint creation, targeting a Log Analytics workspace.
  • Association of the DCR with multiple VMs.

My Configuration:

provider "azurerm" {
  features {}
}


variable "vm_names" {
  description = "A list of virtual machine names to be created and monitored"
  type        = list(string)
}

variable "log_analytics_workspace_name" {
  description = "Name of the Log Analytics Workspace."
  type        = string
}

variable "resource_group_name" {
  description = "Name of the resource group."
  type        = string
}

variable "location" {
  description = "Azure region for all resources."
  default     = "East US"
}

data "azurerm_client_config" "current" {}

resource "azurerm_resource_group" "example" {
  name     = var.resource_group_name
  location = var.location
}

resource "azurerm_storage_account" "example" {
  name                     = "vktestsbvk"
  resource_group_name      = var.resource_group_name
  location                 = var.location
  account_tier             = "Standard"
  account_replication_type = "GRS"
}


resource "azurerm_log_analytics_workspace" "example" {
  name                = var.log_analytics_workspace_name
  location            = var.location
  resource_group_name = var.resource_group_name
  sku                 = "PerGB2018"
}

resource "azurerm_virtual_network" "example" {
  name                = "example-vnet"
  address_space       = ["10.0.0.0/16"]
  location            = var.location
  resource_group_name = var.resource_group_name
}

resource "azurerm_subnet" "example" {
  name                 = "example-subnet"
  resource_group_name  = var.resource_group_name
  virtual_network_name = azurerm_virtual_network.example.name
  address_prefixes     = ["10.0.1.0/24"]
}

resource "azurerm_network_interface" "example" {
  for_each = toset(var.vm_names)

  name                = "${each.value}-nic"
  location            = var.location
  resource_group_name = var.resource_group_name

  ip_configuration {
    name                          = "internal"
    subnet_id                     = azurerm_subnet.example.id
    private_ip_address_allocation = "Dynamic"
  }
}

resource "azurerm_windows_virtual_machine" "example" {
  for_each = toset(var.vm_names)

  name                = each.value
  resource_group_name = var.resource_group_name
  location            = var.location
  size                = "Standard_DS1_v2"
  admin_username      = "adminuser"
  network_interface_ids = [azurerm_network_interface.example[each.key].id]
  admin_password      = "yourpassword"

  os_disk {
    caching              = "ReadWrite"
    storage_account_type = "Standard_LRS"
  }

  source_image_reference {
    publisher = "MicrosoftWindowsServer"
    offer     = "WindowsServer"
    sku       = "2019-Datacenter"
    version   = "latest"
  }

  computer_name = each.value
}

resource "azurerm_virtual_machine_extension" "ama" {
  for_each = azurerm_windows_virtual_machine.example

  name                 = "${each.key}-AMA"
  virtual_machine_id   = each.value.id
  publisher            = "Microsoft.Azure.Monitor"
  type                 = "AzureMonitorWindowsAgent"
  type_handler_version = "1.0"
  auto_upgrade_minor_version = true

  settings = jsonencode({
    "workspaceId": azurerm_log_analytics_workspace.example.workspace_id
  })

  protected_settings = jsonencode({
    "workspaceKey": azurerm_log_analytics_workspace.example.primary_shared_key
  })
}


resource "azurerm_monitor_data_collection_rule" "main" {
  name                = "${var.log_analytics_workspace_name}-DCR"
  resource_group_name         = var.resource_group_name
  location = var.location
  data_collection_endpoint_id = azurerm_monitor_data_collection_endpoint.main.id

  destinations {
    log_analytics {
      workspace_resource_id = azurerm_log_analytics_workspace.example.id
      name                  = "vkLogAnalyticsWorkspace"
  
    }
  }
  
  data_flow {
    streams      = ["Microsoft-InsightsMetrics", "Microsoft-Syslog", "Microsoft-Perf"]
    destinations = ["vkLogAnalyticsWorkspace"]
  }
  

  data_sources {
    syslog {
      facility_names = ["*"]
      log_levels     = ["*"]
      name           = "example-datasource-syslog"
      streams        = ["Microsoft-Syslog"]
    }

    performance_counter {
      streams                       = ["Microsoft-Perf", "Microsoft-InsightsMetrics"]
      sampling_frequency_in_seconds = 60
      counter_specifiers            = ["Processor(*)\\% Processor Time"]
      name                          = "testvk-datasource-perfcounter"
    }

    windows_event_log {
      streams        = ["Microsoft-WindowsEvent"]
      x_path_queries = ["*![System/Level=1]"]
      name           = "testvk-datasource-wineventlog"
    }
  }
}


resource "azurerm_monitor_data_collection_endpoint" "main" {
  name                = "${var.log_analytics_workspace_name}-DCE"
  location            = var.location
  resource_group_name = var.resource_group_name
  kind                = "Windows"
}

resource "azurerm_monitor_data_collection_rule_association" "example1" {
  for_each = azurerm_windows_virtual_machine.example

  name                    = "${each.key}-dcra"
  target_resource_id      = each.value.id
  data_collection_rule_id = azurerm_monitor_data_collection_rule.main.id
}

resource "azurerm_monitor_data_collection_rule_association" "main" {
  for_each = azurerm_windows_virtual_machine.example

  target_resource_id      = each.value.id
  data_collection_endpoint_id = azurerm_monitor_data_collection_endpoint.main.id
}

Deployment Succeeded:

enter image description here

enter image description here

enter image description here

enter image description here