diff --git a/bc-export.ps1 b/bc-export.ps1 index dcda716..79eac4d 100755 --- a/bc-export.ps1 +++ b/bc-export.ps1 @@ -36,8 +36,9 @@ $entities = @( "countriesRegions" ) -# Document entities with line items - fetched with $expand to include lines +# Document entities with line items # Lines cannot be queried standalone; they require a parent document ID +# We fetch documents in small batches with $expand to include lines $documentEntities = @{ "salesInvoices" = "salesInvoiceLines" "salesOrders" = "salesOrderLines" @@ -46,6 +47,13 @@ $documentEntities = @{ "purchaseOrders" = "purchaseOrderLines" } +# Page size for $expand queries - kept small to avoid timeouts/OOM on large datasets +$expandPageSize = 50 + +# Token management +$script:currentToken = $null +$script:tokenExpiry = [datetime]::MinValue + function Write-Log { param([string]$Message, [string]$Level = "INFO") $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" @@ -53,27 +61,24 @@ function Write-Log { } function Get-AzureADToken { - param( - [string]$TenantId, - [string]$ClientId, - [string]$ClientSecret - ) - Write-Log "Authenticating to Azure AD..." - $tokenUrl = "https://login.microsoftonline.com/$TenantId/oauth2/v2.0/token" + $tokenUrl = "https://login.microsoftonline.com/$tenantId/oauth2/v2.0/token" $body = @{ - client_id = $ClientId - client_secret = $ClientSecret + client_id = $clientId + client_secret = $clientSecret scope = "https://api.businesscentral.dynamics.com/.default" grant_type = "client_credentials" } try { $response = Invoke-RestMethod -Uri $tokenUrl -Method Post -Body $body -ContentType "application/x-www-form-urlencoded" - Write-Log "Successfully authenticated to Azure AD" - return $response.access_token + $script:currentToken = $response.access_token + # Refresh 5 minutes before actual expiry (tokens typically last 60-90 min) + $script:tokenExpiry = (Get-Date).AddSeconds($response.expires_in - 300) + Write-Log "Successfully authenticated to Azure AD (expires in $($response.expires_in)s)" + return $script:currentToken } catch { Write-Log "Failed to authenticate: $_" "ERROR" @@ -81,14 +86,22 @@ function Get-AzureADToken { } } +function Get-ValidToken { + if ($null -eq $script:currentToken -or (Get-Date) -ge $script:tokenExpiry) { + Write-Log "Token expired or missing, refreshing..." + Get-AzureADToken | Out-Null + } + return $script:currentToken +} + function Get-BCData { param( - [string]$Token, [string]$Url ) + $token = Get-ValidToken $headers = @{ - "Authorization" = "Bearer $Token" + "Authorization" = "Bearer $token" "Accept" = "application/json" } @@ -97,7 +110,7 @@ function Get-BCData { $currentUrl = $Url while ($currentUrl) { try { - $response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers + $response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300 } catch { Write-Log "API request failed for $currentUrl : $_" "ERROR" @@ -116,18 +129,15 @@ function Get-BCData { } function Get-Companies { - param([string]$Token) - Write-Log "Fetching companies..." $companiesUrl = "$baseUrl/companies" - $companies = Get-BCData -Token $Token -Url $companiesUrl + $companies = Get-BCData -Url $companiesUrl Write-Log "Found $($companies.Count) company/companies" return $companies } function Export-EntityData { param( - [string]$Token, [string]$CompanyId, [string]$CompanyName, [string]$EntityName, @@ -139,7 +149,7 @@ function Export-EntityData { Write-Log " Exporting $EntityName..." try { - $data = Get-BCData -Token $Token -Url $entityUrl + $data = Get-BCData -Url $entityUrl $count = 0 if ($data) { $count = $data.Count } @@ -151,7 +161,6 @@ function Export-EntityData { } catch { Write-Log " Failed to export ${EntityName}: $_" "WARN" - # Write empty array so downstream knows it was attempted $outputFile = Join-Path $OutputDir "$EntityName.json" "[]" | Out-File -FilePath $outputFile -Encoding utf8 return 0 @@ -160,7 +169,6 @@ function Export-EntityData { function Export-DocumentWithLines { param( - [string]$Token, [string]$CompanyId, [string]$CompanyName, [string]$DocumentEntity, @@ -168,34 +176,62 @@ function Export-DocumentWithLines { [string]$OutputDir ) - $entityUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity" + '?$expand=' + $LineEntity + # Use $expand with small $top to avoid huge payloads + # BC API will provide @odata.nextLink for subsequent pages + $entityUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity" + '?$expand=' + $LineEntity + '&$top=' + $expandPageSize - Write-Log " Exporting $DocumentEntity (with $LineEntity)..." + Write-Log " Exporting $DocumentEntity (with $LineEntity, batch size $expandPageSize)..." + + $outputFile = Join-Path $OutputDir "$DocumentEntity.jsonl" + $docCount = 0 + $lineCount = 0 + $currentUrl = $entityUrl try { - $data = Get-BCData -Token $Token -Url $entityUrl - $docCount = 0 - $lineCount = 0 - if ($data) { - $docCount = $data.Count - foreach ($doc in $data) { - if ($doc.$LineEntity) { - $lineCount += $doc.$LineEntity.Count + # Clear output file + [System.IO.File]::WriteAllText($outputFile, "") + + while ($currentUrl) { + $token = Get-ValidToken + $headers = @{ + "Authorization" = "Bearer $token" + "Accept" = "application/json" + } + + try { + $response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300 + } + catch { + Write-Log " API request failed at doc #$docCount : $_" "ERROR" + throw + } + + if ($response.value) { + foreach ($doc in $response.value) { + $lines = 0 + if ($doc.$LineEntity) { + $lines = $doc.$LineEntity.Count + } + $lineCount += $lines + $docCount++ + + # Write each document immediately to disk (JSONL: one JSON object per line) + $jsonLine = $doc | ConvertTo-Json -Depth 10 -Compress + [System.IO.File]::AppendAllText($outputFile, $jsonLine + "`n") } } + + Write-Log " Progress: $docCount documents, $lineCount lines" + + # Next page + $currentUrl = $response.'@odata.nextLink' } - # Save the documents (with lines embedded) - $outputFile = Join-Path $OutputDir "$DocumentEntity.json" - $data | ConvertTo-Json -Depth 10 | Out-File -FilePath $outputFile -Encoding utf8 - - Write-Log " $DocumentEntity : $docCount documents, $lineCount lines" + Write-Log " $DocumentEntity : $docCount documents, $lineCount lines (complete)" return ($docCount + $lineCount) } catch { - Write-Log " Failed to export ${DocumentEntity} with lines: $_" "WARN" - $outputFile = Join-Path $OutputDir "$DocumentEntity.json" - "[]" | Out-File -FilePath $outputFile -Encoding utf8 + Write-Log " Failed to export ${DocumentEntity} with lines at doc #$docCount : $_" "WARN" return 0 } } @@ -216,10 +252,10 @@ try { } # Step 1: Get Azure AD token - $token = Get-AzureADToken -TenantId $tenantId -ClientId $clientId -ClientSecret $clientSecret + Get-AzureADToken | Out-Null # Step 2: Get companies - $companies = Get-Companies -Token $token + $companies = Get-Companies if ($companies.Count -eq 0) { Write-Log "No companies found in environment $environmentName" "ERROR" @@ -262,7 +298,6 @@ try { # Export standalone entities foreach ($entity in $entities) { $count = Export-EntityData ` - -Token $token ` -CompanyId $companyId ` -CompanyName $companyName ` -EntityName $entity ` @@ -276,12 +311,12 @@ try { } } - # Export document entities with their line items via $expand + # Export document entities with their line items + # Uses small batches + streaming to disk to handle large datasets foreach ($docEntity in $documentEntities.Keys) { $lineEntity = $documentEntities[$docEntity] $count = Export-DocumentWithLines ` - -Token $token ` -CompanyId $companyId ` -CompanyName $companyName ` -DocumentEntity $docEntity `