fix: stream document+lines export to disk in small batches
The previous $expand approach tried to load all documents with lines into memory at once, causing hangs/OOM on companies with hundreds of thousands of records. Changes: - Fetch documents with $expand in small pages ($top=50) instead of loading everything into memory - Stream each document to disk immediately as JSONL (one JSON object per line) instead of accumulating in an array - Add automatic token refresh for long-running exports (tokens expire after ~60 min) - Add 300s timeout per API request to detect stalls - Log progress after each batch so you can see it's working Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
125
bc-export.ps1
125
bc-export.ps1
@@ -36,8 +36,9 @@ $entities = @(
|
||||
"countriesRegions"
|
||||
)
|
||||
|
||||
# Document entities with line items - fetched with $expand to include lines
|
||||
# Document entities with line items
|
||||
# Lines cannot be queried standalone; they require a parent document ID
|
||||
# We fetch documents in small batches with $expand to include lines
|
||||
$documentEntities = @{
|
||||
"salesInvoices" = "salesInvoiceLines"
|
||||
"salesOrders" = "salesOrderLines"
|
||||
@@ -46,6 +47,13 @@ $documentEntities = @{
|
||||
"purchaseOrders" = "purchaseOrderLines"
|
||||
}
|
||||
|
||||
# Page size for $expand queries - kept small to avoid timeouts/OOM on large datasets
|
||||
$expandPageSize = 50
|
||||
|
||||
# Token management
|
||||
$script:currentToken = $null
|
||||
$script:tokenExpiry = [datetime]::MinValue
|
||||
|
||||
function Write-Log {
|
||||
param([string]$Message, [string]$Level = "INFO")
|
||||
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
|
||||
@@ -53,27 +61,24 @@ function Write-Log {
|
||||
}
|
||||
|
||||
function Get-AzureADToken {
|
||||
param(
|
||||
[string]$TenantId,
|
||||
[string]$ClientId,
|
||||
[string]$ClientSecret
|
||||
)
|
||||
|
||||
Write-Log "Authenticating to Azure AD..."
|
||||
|
||||
$tokenUrl = "https://login.microsoftonline.com/$TenantId/oauth2/v2.0/token"
|
||||
$tokenUrl = "https://login.microsoftonline.com/$tenantId/oauth2/v2.0/token"
|
||||
|
||||
$body = @{
|
||||
client_id = $ClientId
|
||||
client_secret = $ClientSecret
|
||||
client_id = $clientId
|
||||
client_secret = $clientSecret
|
||||
scope = "https://api.businesscentral.dynamics.com/.default"
|
||||
grant_type = "client_credentials"
|
||||
}
|
||||
|
||||
try {
|
||||
$response = Invoke-RestMethod -Uri $tokenUrl -Method Post -Body $body -ContentType "application/x-www-form-urlencoded"
|
||||
Write-Log "Successfully authenticated to Azure AD"
|
||||
return $response.access_token
|
||||
$script:currentToken = $response.access_token
|
||||
# Refresh 5 minutes before actual expiry (tokens typically last 60-90 min)
|
||||
$script:tokenExpiry = (Get-Date).AddSeconds($response.expires_in - 300)
|
||||
Write-Log "Successfully authenticated to Azure AD (expires in $($response.expires_in)s)"
|
||||
return $script:currentToken
|
||||
}
|
||||
catch {
|
||||
Write-Log "Failed to authenticate: $_" "ERROR"
|
||||
@@ -81,14 +86,22 @@ function Get-AzureADToken {
|
||||
}
|
||||
}
|
||||
|
||||
function Get-ValidToken {
|
||||
if ($null -eq $script:currentToken -or (Get-Date) -ge $script:tokenExpiry) {
|
||||
Write-Log "Token expired or missing, refreshing..."
|
||||
Get-AzureADToken | Out-Null
|
||||
}
|
||||
return $script:currentToken
|
||||
}
|
||||
|
||||
function Get-BCData {
|
||||
param(
|
||||
[string]$Token,
|
||||
[string]$Url
|
||||
)
|
||||
|
||||
$token = Get-ValidToken
|
||||
$headers = @{
|
||||
"Authorization" = "Bearer $Token"
|
||||
"Authorization" = "Bearer $token"
|
||||
"Accept" = "application/json"
|
||||
}
|
||||
|
||||
@@ -97,7 +110,7 @@ function Get-BCData {
|
||||
$currentUrl = $Url
|
||||
while ($currentUrl) {
|
||||
try {
|
||||
$response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers
|
||||
$response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300
|
||||
}
|
||||
catch {
|
||||
Write-Log "API request failed for $currentUrl : $_" "ERROR"
|
||||
@@ -116,18 +129,15 @@ function Get-BCData {
|
||||
}
|
||||
|
||||
function Get-Companies {
|
||||
param([string]$Token)
|
||||
|
||||
Write-Log "Fetching companies..."
|
||||
$companiesUrl = "$baseUrl/companies"
|
||||
$companies = Get-BCData -Token $Token -Url $companiesUrl
|
||||
$companies = Get-BCData -Url $companiesUrl
|
||||
Write-Log "Found $($companies.Count) company/companies"
|
||||
return $companies
|
||||
}
|
||||
|
||||
function Export-EntityData {
|
||||
param(
|
||||
[string]$Token,
|
||||
[string]$CompanyId,
|
||||
[string]$CompanyName,
|
||||
[string]$EntityName,
|
||||
@@ -139,7 +149,7 @@ function Export-EntityData {
|
||||
Write-Log " Exporting $EntityName..."
|
||||
|
||||
try {
|
||||
$data = Get-BCData -Token $Token -Url $entityUrl
|
||||
$data = Get-BCData -Url $entityUrl
|
||||
$count = 0
|
||||
if ($data) { $count = $data.Count }
|
||||
|
||||
@@ -151,7 +161,6 @@ function Export-EntityData {
|
||||
}
|
||||
catch {
|
||||
Write-Log " Failed to export ${EntityName}: $_" "WARN"
|
||||
# Write empty array so downstream knows it was attempted
|
||||
$outputFile = Join-Path $OutputDir "$EntityName.json"
|
||||
"[]" | Out-File -FilePath $outputFile -Encoding utf8
|
||||
return 0
|
||||
@@ -160,7 +169,6 @@ function Export-EntityData {
|
||||
|
||||
function Export-DocumentWithLines {
|
||||
param(
|
||||
[string]$Token,
|
||||
[string]$CompanyId,
|
||||
[string]$CompanyName,
|
||||
[string]$DocumentEntity,
|
||||
@@ -168,34 +176,62 @@ function Export-DocumentWithLines {
|
||||
[string]$OutputDir
|
||||
)
|
||||
|
||||
$entityUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity" + '?$expand=' + $LineEntity
|
||||
# Use $expand with small $top to avoid huge payloads
|
||||
# BC API will provide @odata.nextLink for subsequent pages
|
||||
$entityUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity" + '?$expand=' + $LineEntity + '&$top=' + $expandPageSize
|
||||
|
||||
Write-Log " Exporting $DocumentEntity (with $LineEntity)..."
|
||||
Write-Log " Exporting $DocumentEntity (with $LineEntity, batch size $expandPageSize)..."
|
||||
|
||||
$outputFile = Join-Path $OutputDir "$DocumentEntity.jsonl"
|
||||
$docCount = 0
|
||||
$lineCount = 0
|
||||
$currentUrl = $entityUrl
|
||||
|
||||
try {
|
||||
$data = Get-BCData -Token $Token -Url $entityUrl
|
||||
$docCount = 0
|
||||
$lineCount = 0
|
||||
if ($data) {
|
||||
$docCount = $data.Count
|
||||
foreach ($doc in $data) {
|
||||
if ($doc.$LineEntity) {
|
||||
$lineCount += $doc.$LineEntity.Count
|
||||
# Clear output file
|
||||
[System.IO.File]::WriteAllText($outputFile, "")
|
||||
|
||||
while ($currentUrl) {
|
||||
$token = Get-ValidToken
|
||||
$headers = @{
|
||||
"Authorization" = "Bearer $token"
|
||||
"Accept" = "application/json"
|
||||
}
|
||||
|
||||
try {
|
||||
$response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300
|
||||
}
|
||||
catch {
|
||||
Write-Log " API request failed at doc #$docCount : $_" "ERROR"
|
||||
throw
|
||||
}
|
||||
|
||||
if ($response.value) {
|
||||
foreach ($doc in $response.value) {
|
||||
$lines = 0
|
||||
if ($doc.$LineEntity) {
|
||||
$lines = $doc.$LineEntity.Count
|
||||
}
|
||||
$lineCount += $lines
|
||||
$docCount++
|
||||
|
||||
# Write each document immediately to disk (JSONL: one JSON object per line)
|
||||
$jsonLine = $doc | ConvertTo-Json -Depth 10 -Compress
|
||||
[System.IO.File]::AppendAllText($outputFile, $jsonLine + "`n")
|
||||
}
|
||||
}
|
||||
|
||||
Write-Log " Progress: $docCount documents, $lineCount lines"
|
||||
|
||||
# Next page
|
||||
$currentUrl = $response.'@odata.nextLink'
|
||||
}
|
||||
|
||||
# Save the documents (with lines embedded)
|
||||
$outputFile = Join-Path $OutputDir "$DocumentEntity.json"
|
||||
$data | ConvertTo-Json -Depth 10 | Out-File -FilePath $outputFile -Encoding utf8
|
||||
|
||||
Write-Log " $DocumentEntity : $docCount documents, $lineCount lines"
|
||||
Write-Log " $DocumentEntity : $docCount documents, $lineCount lines (complete)"
|
||||
return ($docCount + $lineCount)
|
||||
}
|
||||
catch {
|
||||
Write-Log " Failed to export ${DocumentEntity} with lines: $_" "WARN"
|
||||
$outputFile = Join-Path $OutputDir "$DocumentEntity.json"
|
||||
"[]" | Out-File -FilePath $outputFile -Encoding utf8
|
||||
Write-Log " Failed to export ${DocumentEntity} with lines at doc #$docCount : $_" "WARN"
|
||||
return 0
|
||||
}
|
||||
}
|
||||
@@ -216,10 +252,10 @@ try {
|
||||
}
|
||||
|
||||
# Step 1: Get Azure AD token
|
||||
$token = Get-AzureADToken -TenantId $tenantId -ClientId $clientId -ClientSecret $clientSecret
|
||||
Get-AzureADToken | Out-Null
|
||||
|
||||
# Step 2: Get companies
|
||||
$companies = Get-Companies -Token $token
|
||||
$companies = Get-Companies
|
||||
|
||||
if ($companies.Count -eq 0) {
|
||||
Write-Log "No companies found in environment $environmentName" "ERROR"
|
||||
@@ -262,7 +298,6 @@ try {
|
||||
# Export standalone entities
|
||||
foreach ($entity in $entities) {
|
||||
$count = Export-EntityData `
|
||||
-Token $token `
|
||||
-CompanyId $companyId `
|
||||
-CompanyName $companyName `
|
||||
-EntityName $entity `
|
||||
@@ -276,12 +311,12 @@ try {
|
||||
}
|
||||
}
|
||||
|
||||
# Export document entities with their line items via $expand
|
||||
# Export document entities with their line items
|
||||
# Uses small batches + streaming to disk to handle large datasets
|
||||
foreach ($docEntity in $documentEntities.Keys) {
|
||||
$lineEntity = $documentEntities[$docEntity]
|
||||
|
||||
$count = Export-DocumentWithLines `
|
||||
-Token $token `
|
||||
-CompanyId $companyId `
|
||||
-CompanyName $companyName `
|
||||
-DocumentEntity $docEntity `
|
||||
|
||||
Reference in New Issue
Block a user