From d08250a47915ef065e3b80a9e6fe8c783a5cfc1f Mon Sep 17 00:00:00 2001 From: Malin Date: Mon, 16 Feb 2026 09:20:52 +0100 Subject: [PATCH] fix: drop $expand, fetch document lines per-document instead The $expand approach had two fatal problems: 1. $top=50 with $expand made BC treat it as a hard limit with no @odata.nextLink, so only 50 docs were exported total 2. salesOrders with $expand timed out even at 50 docs when orders have many lines New approach: fetch document headers normally (BC paginates fine on its own), then for each document fetch its lines separately via /salesInvoices({id})/salesInvoiceLines. More API calls but each is small, fast, and reliable. Also added: - Invoke-BCApi with retry logic (backoff on 429/5xx/timeout) - Separate output files: headers in {entity}.jsonl, lines in {lineEntity}.jsonl - Partial data is preserved if export fails mid-way - Progress logged every 100 documents Co-Authored-By: Claude Opus 4.6 --- bc-export.ps1 | 153 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 97 insertions(+), 56 deletions(-) diff --git a/bc-export.ps1 b/bc-export.ps1 index 79eac4d..84f3ae4 100755 --- a/bc-export.ps1 +++ b/bc-export.ps1 @@ -37,8 +37,8 @@ $entities = @( ) # Document entities with line items -# Lines cannot be queried standalone; they require a parent document ID -# We fetch documents in small batches with $expand to include lines +# Lines cannot be queried standalone at the top level. +# We fetch document headers first, then fetch lines per document. $documentEntities = @{ "salesInvoices" = "salesInvoiceLines" "salesOrders" = "salesOrderLines" @@ -47,9 +47,6 @@ $documentEntities = @{ "purchaseOrders" = "purchaseOrderLines" } -# Page size for $expand queries - kept small to avoid timeouts/OOM on large datasets -$expandPageSize = 50 - # Token management $script:currentToken = $null $script:tokenExpiry = [datetime]::MinValue @@ -77,7 +74,7 @@ function Get-AzureADToken { $script:currentToken = $response.access_token # Refresh 5 minutes before actual expiry (tokens typically last 60-90 min) $script:tokenExpiry = (Get-Date).AddSeconds($response.expires_in - 300) - Write-Log "Successfully authenticated to Azure AD (expires in $($response.expires_in)s)" + Write-Log "Successfully authenticated (token valid for $($response.expires_in)s)" return $script:currentToken } catch { @@ -94,34 +91,58 @@ function Get-ValidToken { return $script:currentToken } +function Invoke-BCApi { + param( + [string]$Url, + [int]$TimeoutSec = 120, + [int]$MaxRetries = 3 + ) + + for ($attempt = 1; $attempt -le $MaxRetries; $attempt++) { + $token = Get-ValidToken + $headers = @{ + "Authorization" = "Bearer $token" + "Accept" = "application/json" + } + + try { + $response = Invoke-RestMethod -Uri $Url -Method Get -Headers $headers -TimeoutSec $TimeoutSec + return $response + } + catch { + $statusCode = $null + if ($_.Exception.Response) { + $statusCode = [int]$_.Exception.Response.StatusCode + } + + # Retry on 429 (throttled) or 5xx (server error) or timeout + $isRetryable = ($statusCode -eq 429) -or ($statusCode -ge 500) -or ($_ -match "Timeout") + if ($isRetryable -and $attempt -lt $MaxRetries) { + $wait = $attempt * 10 + Write-Log " Request failed (attempt $attempt/$MaxRetries), retrying in ${wait}s..." "WARN" + Start-Sleep -Seconds $wait + continue + } + throw + } + } +} + function Get-BCData { param( [string]$Url ) - $token = Get-ValidToken - $headers = @{ - "Authorization" = "Bearer $token" - "Accept" = "application/json" - } - $allRecords = @() - $currentUrl = $Url + while ($currentUrl) { - try { - $response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300 - } - catch { - Write-Log "API request failed for $currentUrl : $_" "ERROR" - throw - } + $response = Invoke-BCApi -Url $currentUrl if ($response.value) { $allRecords += $response.value } - # Handle OData pagination $currentUrl = $response.'@odata.nextLink' } @@ -176,54 +197,74 @@ function Export-DocumentWithLines { [string]$OutputDir ) - # Use $expand with small $top to avoid huge payloads - # BC API will provide @odata.nextLink for subsequent pages - $entityUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity" + '?$expand=' + $LineEntity + '&$top=' + $expandPageSize + Write-Log " Exporting $DocumentEntity (headers)..." - Write-Log " Exporting $DocumentEntity (with $LineEntity, batch size $expandPageSize)..." + $docFile = Join-Path $OutputDir "$DocumentEntity.jsonl" + $lineFile = Join-Path $OutputDir "$LineEntity.jsonl" + [System.IO.File]::WriteAllText($docFile, "") + [System.IO.File]::WriteAllText($lineFile, "") - $outputFile = Join-Path $OutputDir "$DocumentEntity.jsonl" $docCount = 0 $lineCount = 0 - $currentUrl = $entityUrl try { - # Clear output file - [System.IO.File]::WriteAllText($outputFile, "") + # Step 1: Fetch document headers page by page (no $expand) + # BC API default page size is ~100, with @odata.nextLink for more + $currentUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity" while ($currentUrl) { - $token = Get-ValidToken - $headers = @{ - "Authorization" = "Bearer $token" - "Accept" = "application/json" + $response = Invoke-BCApi -Url $currentUrl + + if (-not $response.value -or $response.value.Count -eq 0) { + break } - try { - $response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300 - } - catch { - Write-Log " API request failed at doc #$docCount : $_" "ERROR" - throw - } + # Step 2: For each document in this page, fetch its lines + foreach ($doc in $response.value) { + $docCount++ + $docId = $doc.id - if ($response.value) { - foreach ($doc in $response.value) { - $lines = 0 - if ($doc.$LineEntity) { - $lines = $doc.$LineEntity.Count + # Write document header to disk + $jsonLine = $doc | ConvertTo-Json -Depth 10 -Compress + [System.IO.File]::AppendAllText($docFile, $jsonLine + "`n") + + # Fetch lines for this document + $linesUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity($docId)/$LineEntity" + try { + $linesResponse = Invoke-BCApi -Url $linesUrl -TimeoutSec 60 + if ($linesResponse.value -and $linesResponse.value.Count -gt 0) { + foreach ($line in $linesResponse.value) { + $lineCount++ + $lineJson = $line | ConvertTo-Json -Depth 10 -Compress + [System.IO.File]::AppendAllText($lineFile, $lineJson + "`n") + } + + # Handle pagination within lines (unlikely but possible) + $nextLinesUrl = $linesResponse.'@odata.nextLink' + while ($nextLinesUrl) { + $moreLinesResponse = Invoke-BCApi -Url $nextLinesUrl -TimeoutSec 60 + if ($moreLinesResponse.value) { + foreach ($line in $moreLinesResponse.value) { + $lineCount++ + $lineJson = $line | ConvertTo-Json -Depth 10 -Compress + [System.IO.File]::AppendAllText($lineFile, $lineJson + "`n") + } + } + $nextLinesUrl = $moreLinesResponse.'@odata.nextLink' + } } - $lineCount += $lines - $docCount++ + } + catch { + Write-Log " Warning: failed to fetch lines for $DocumentEntity $docId : $_" "WARN" + } - # Write each document immediately to disk (JSONL: one JSON object per line) - $jsonLine = $doc | ConvertTo-Json -Depth 10 -Compress - [System.IO.File]::AppendAllText($outputFile, $jsonLine + "`n") + # Progress every 100 documents + if ($docCount % 100 -eq 0) { + Write-Log " Progress: $docCount documents, $lineCount lines" } } - Write-Log " Progress: $docCount documents, $lineCount lines" - - # Next page + # Next page of documents $currentUrl = $response.'@odata.nextLink' } @@ -231,8 +272,9 @@ function Export-DocumentWithLines { return ($docCount + $lineCount) } catch { - Write-Log " Failed to export ${DocumentEntity} with lines at doc #$docCount : $_" "WARN" - return 0 + Write-Log " Failed to export ${DocumentEntity} at doc #$docCount : $_" "WARN" + Write-Log " Partial data saved ($docCount docs, $lineCount lines)" "WARN" + return ($docCount + $lineCount) } } @@ -312,7 +354,6 @@ try { } # Export document entities with their line items - # Uses small batches + streaming to disk to handle large datasets foreach ($docEntity in $documentEntities.Keys) { $lineEntity = $documentEntities[$docEntity]