fix: drop $expand, fetch document lines per-document instead

The $expand approach had two fatal problems:
1. $top=50 with $expand made BC treat it as a hard limit with no
   @odata.nextLink, so only 50 docs were exported total
2. salesOrders with $expand timed out even at 50 docs when orders
   have many lines

New approach: fetch document headers normally (BC paginates fine on
its own), then for each document fetch its lines separately via
/salesInvoices({id})/salesInvoiceLines. More API calls but each is
small, fast, and reliable.

Also added:
- Invoke-BCApi with retry logic (backoff on 429/5xx/timeout)
- Separate output files: headers in {entity}.jsonl, lines in
  {lineEntity}.jsonl
- Partial data is preserved if export fails mid-way
- Progress logged every 100 documents

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 09:20:52 +01:00
parent 5ebfc3f443
commit d08250a479

View File

@@ -37,8 +37,8 @@ $entities = @(
)
# Document entities with line items
# Lines cannot be queried standalone; they require a parent document ID
# We fetch documents in small batches with $expand to include lines
# Lines cannot be queried standalone at the top level.
# We fetch document headers first, then fetch lines per document.
$documentEntities = @{
"salesInvoices" = "salesInvoiceLines"
"salesOrders" = "salesOrderLines"
@@ -47,9 +47,6 @@ $documentEntities = @{
"purchaseOrders" = "purchaseOrderLines"
}
# Page size for $expand queries - kept small to avoid timeouts/OOM on large datasets
$expandPageSize = 50
# Token management
$script:currentToken = $null
$script:tokenExpiry = [datetime]::MinValue
@@ -77,7 +74,7 @@ function Get-AzureADToken {
$script:currentToken = $response.access_token
# Refresh 5 minutes before actual expiry (tokens typically last 60-90 min)
$script:tokenExpiry = (Get-Date).AddSeconds($response.expires_in - 300)
Write-Log "Successfully authenticated to Azure AD (expires in $($response.expires_in)s)"
Write-Log "Successfully authenticated (token valid for $($response.expires_in)s)"
return $script:currentToken
}
catch {
@@ -94,34 +91,58 @@ function Get-ValidToken {
return $script:currentToken
}
function Get-BCData {
function Invoke-BCApi {
param(
[string]$Url
[string]$Url,
[int]$TimeoutSec = 120,
[int]$MaxRetries = 3
)
for ($attempt = 1; $attempt -le $MaxRetries; $attempt++) {
$token = Get-ValidToken
$headers = @{
"Authorization" = "Bearer $token"
"Accept" = "application/json"
}
$allRecords = @()
$currentUrl = $Url
while ($currentUrl) {
try {
$response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300
$response = Invoke-RestMethod -Uri $Url -Method Get -Headers $headers -TimeoutSec $TimeoutSec
return $response
}
catch {
Write-Log "API request failed for $currentUrl : $_" "ERROR"
$statusCode = $null
if ($_.Exception.Response) {
$statusCode = [int]$_.Exception.Response.StatusCode
}
# Retry on 429 (throttled) or 5xx (server error) or timeout
$isRetryable = ($statusCode -eq 429) -or ($statusCode -ge 500) -or ($_ -match "Timeout")
if ($isRetryable -and $attempt -lt $MaxRetries) {
$wait = $attempt * 10
Write-Log " Request failed (attempt $attempt/$MaxRetries), retrying in ${wait}s..." "WARN"
Start-Sleep -Seconds $wait
continue
}
throw
}
}
}
function Get-BCData {
param(
[string]$Url
)
$allRecords = @()
$currentUrl = $Url
while ($currentUrl) {
$response = Invoke-BCApi -Url $currentUrl
if ($response.value) {
$allRecords += $response.value
}
# Handle OData pagination
$currentUrl = $response.'@odata.nextLink'
}
@@ -176,54 +197,74 @@ function Export-DocumentWithLines {
[string]$OutputDir
)
# Use $expand with small $top to avoid huge payloads
# BC API will provide @odata.nextLink for subsequent pages
$entityUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity" + '?$expand=' + $LineEntity + '&$top=' + $expandPageSize
Write-Log " Exporting $DocumentEntity (headers)..."
Write-Log " Exporting $DocumentEntity (with $LineEntity, batch size $expandPageSize)..."
$docFile = Join-Path $OutputDir "$DocumentEntity.jsonl"
$lineFile = Join-Path $OutputDir "$LineEntity.jsonl"
[System.IO.File]::WriteAllText($docFile, "")
[System.IO.File]::WriteAllText($lineFile, "")
$outputFile = Join-Path $OutputDir "$DocumentEntity.jsonl"
$docCount = 0
$lineCount = 0
$currentUrl = $entityUrl
try {
# Clear output file
[System.IO.File]::WriteAllText($outputFile, "")
# Step 1: Fetch document headers page by page (no $expand)
# BC API default page size is ~100, with @odata.nextLink for more
$currentUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity"
while ($currentUrl) {
$token = Get-ValidToken
$headers = @{
"Authorization" = "Bearer $token"
"Accept" = "application/json"
$response = Invoke-BCApi -Url $currentUrl
if (-not $response.value -or $response.value.Count -eq 0) {
break
}
# Step 2: For each document in this page, fetch its lines
foreach ($doc in $response.value) {
$docCount++
$docId = $doc.id
# Write document header to disk
$jsonLine = $doc | ConvertTo-Json -Depth 10 -Compress
[System.IO.File]::AppendAllText($docFile, $jsonLine + "`n")
# Fetch lines for this document
$linesUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity($docId)/$LineEntity"
try {
$response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300
$linesResponse = Invoke-BCApi -Url $linesUrl -TimeoutSec 60
if ($linesResponse.value -and $linesResponse.value.Count -gt 0) {
foreach ($line in $linesResponse.value) {
$lineCount++
$lineJson = $line | ConvertTo-Json -Depth 10 -Compress
[System.IO.File]::AppendAllText($lineFile, $lineJson + "`n")
}
# Handle pagination within lines (unlikely but possible)
$nextLinesUrl = $linesResponse.'@odata.nextLink'
while ($nextLinesUrl) {
$moreLinesResponse = Invoke-BCApi -Url $nextLinesUrl -TimeoutSec 60
if ($moreLinesResponse.value) {
foreach ($line in $moreLinesResponse.value) {
$lineCount++
$lineJson = $line | ConvertTo-Json -Depth 10 -Compress
[System.IO.File]::AppendAllText($lineFile, $lineJson + "`n")
}
}
$nextLinesUrl = $moreLinesResponse.'@odata.nextLink'
}
}
}
catch {
Write-Log " API request failed at doc #$docCount : $_" "ERROR"
throw
}
if ($response.value) {
foreach ($doc in $response.value) {
$lines = 0
if ($doc.$LineEntity) {
$lines = $doc.$LineEntity.Count
}
$lineCount += $lines
$docCount++
# Write each document immediately to disk (JSONL: one JSON object per line)
$jsonLine = $doc | ConvertTo-Json -Depth 10 -Compress
[System.IO.File]::AppendAllText($outputFile, $jsonLine + "`n")
}
Write-Log " Warning: failed to fetch lines for $DocumentEntity $docId : $_" "WARN"
}
# Progress every 100 documents
if ($docCount % 100 -eq 0) {
Write-Log " Progress: $docCount documents, $lineCount lines"
}
}
# Next page
# Next page of documents
$currentUrl = $response.'@odata.nextLink'
}
@@ -231,8 +272,9 @@ function Export-DocumentWithLines {
return ($docCount + $lineCount)
}
catch {
Write-Log " Failed to export ${DocumentEntity} with lines at doc #$docCount : $_" "WARN"
return 0
Write-Log " Failed to export ${DocumentEntity} at doc #$docCount : $_" "WARN"
Write-Log " Partial data saved ($docCount docs, $lineCount lines)" "WARN"
return ($docCount + $lineCount)
}
}
@@ -312,7 +354,6 @@ try {
}
# Export document entities with their line items
# Uses small batches + streaming to disk to handle large datasets
foreach ($docEntity in $documentEntities.Keys) {
$lineEntity = $documentEntities[$docEntity]