Files
BC-bak/bc-export.ps1
Malin 5ebfc3f443 fix: stream document+lines export to disk in small batches
The previous $expand approach tried to load all documents with lines
into memory at once, causing hangs/OOM on companies with hundreds of
thousands of records.

Changes:
- Fetch documents with $expand in small pages ($top=50) instead of
  loading everything into memory
- Stream each document to disk immediately as JSONL (one JSON object
  per line) instead of accumulating in an array
- Add automatic token refresh for long-running exports (tokens expire
  after ~60 min)
- Add 300s timeout per API request to detect stalls
- Log progress after each batch so you can see it's working

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 09:09:47 +01:00

362 lines
11 KiB
PowerShell
Executable File

#!/usr/bin/env pwsh
#
# Business Central Data Export via BC API v2.0
# Authenticates to Azure AD and extracts critical business data as JSON
#
param(
[Parameter(Mandatory=$true)]
[string]$OutputPath
)
# Get configuration from environment variables
$tenantId = $env:AZURE_TENANT_ID
$clientId = $env:AZURE_CLIENT_ID
$clientSecret = $env:AZURE_CLIENT_SECRET
$environmentName = $env:BC_ENVIRONMENT_NAME
$bcCompanyName = $env:BC_COMPANY_NAME # optional: filter to specific company
$baseUrl = "https://api.businesscentral.dynamics.com/v2.0/$tenantId/$environmentName/api/v2.0"
# Standalone entities to extract
$entities = @(
"accounts",
"customers",
"vendors",
"items",
"generalLedgerEntries",
"bankAccounts",
"employees",
"dimensions",
"dimensionValues",
"currencies",
"paymentTerms",
"paymentMethods",
"journals",
"countriesRegions"
)
# Document entities with line items
# Lines cannot be queried standalone; they require a parent document ID
# We fetch documents in small batches with $expand to include lines
$documentEntities = @{
"salesInvoices" = "salesInvoiceLines"
"salesOrders" = "salesOrderLines"
"salesCreditMemos" = "salesCreditMemoLines"
"purchaseInvoices" = "purchaseInvoiceLines"
"purchaseOrders" = "purchaseOrderLines"
}
# Page size for $expand queries - kept small to avoid timeouts/OOM on large datasets
$expandPageSize = 50
# Token management
$script:currentToken = $null
$script:tokenExpiry = [datetime]::MinValue
function Write-Log {
param([string]$Message, [string]$Level = "INFO")
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
Write-Host "[$timestamp] [$Level] $Message"
}
function Get-AzureADToken {
Write-Log "Authenticating to Azure AD..."
$tokenUrl = "https://login.microsoftonline.com/$tenantId/oauth2/v2.0/token"
$body = @{
client_id = $clientId
client_secret = $clientSecret
scope = "https://api.businesscentral.dynamics.com/.default"
grant_type = "client_credentials"
}
try {
$response = Invoke-RestMethod -Uri $tokenUrl -Method Post -Body $body -ContentType "application/x-www-form-urlencoded"
$script:currentToken = $response.access_token
# Refresh 5 minutes before actual expiry (tokens typically last 60-90 min)
$script:tokenExpiry = (Get-Date).AddSeconds($response.expires_in - 300)
Write-Log "Successfully authenticated to Azure AD (expires in $($response.expires_in)s)"
return $script:currentToken
}
catch {
Write-Log "Failed to authenticate: $_" "ERROR"
throw
}
}
function Get-ValidToken {
if ($null -eq $script:currentToken -or (Get-Date) -ge $script:tokenExpiry) {
Write-Log "Token expired or missing, refreshing..."
Get-AzureADToken | Out-Null
}
return $script:currentToken
}
function Get-BCData {
param(
[string]$Url
)
$token = Get-ValidToken
$headers = @{
"Authorization" = "Bearer $token"
"Accept" = "application/json"
}
$allRecords = @()
$currentUrl = $Url
while ($currentUrl) {
try {
$response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300
}
catch {
Write-Log "API request failed for $currentUrl : $_" "ERROR"
throw
}
if ($response.value) {
$allRecords += $response.value
}
# Handle OData pagination
$currentUrl = $response.'@odata.nextLink'
}
return $allRecords
}
function Get-Companies {
Write-Log "Fetching companies..."
$companiesUrl = "$baseUrl/companies"
$companies = Get-BCData -Url $companiesUrl
Write-Log "Found $($companies.Count) company/companies"
return $companies
}
function Export-EntityData {
param(
[string]$CompanyId,
[string]$CompanyName,
[string]$EntityName,
[string]$OutputDir
)
$entityUrl = "$baseUrl/companies($CompanyId)/$EntityName"
Write-Log " Exporting $EntityName..."
try {
$data = Get-BCData -Url $entityUrl
$count = 0
if ($data) { $count = $data.Count }
$outputFile = Join-Path $OutputDir "$EntityName.json"
$data | ConvertTo-Json -Depth 10 | Out-File -FilePath $outputFile -Encoding utf8
Write-Log " $EntityName : $count records"
return $count
}
catch {
Write-Log " Failed to export ${EntityName}: $_" "WARN"
$outputFile = Join-Path $OutputDir "$EntityName.json"
"[]" | Out-File -FilePath $outputFile -Encoding utf8
return 0
}
}
function Export-DocumentWithLines {
param(
[string]$CompanyId,
[string]$CompanyName,
[string]$DocumentEntity,
[string]$LineEntity,
[string]$OutputDir
)
# Use $expand with small $top to avoid huge payloads
# BC API will provide @odata.nextLink for subsequent pages
$entityUrl = "$baseUrl/companies($CompanyId)/$DocumentEntity" + '?$expand=' + $LineEntity + '&$top=' + $expandPageSize
Write-Log " Exporting $DocumentEntity (with $LineEntity, batch size $expandPageSize)..."
$outputFile = Join-Path $OutputDir "$DocumentEntity.jsonl"
$docCount = 0
$lineCount = 0
$currentUrl = $entityUrl
try {
# Clear output file
[System.IO.File]::WriteAllText($outputFile, "")
while ($currentUrl) {
$token = Get-ValidToken
$headers = @{
"Authorization" = "Bearer $token"
"Accept" = "application/json"
}
try {
$response = Invoke-RestMethod -Uri $currentUrl -Method Get -Headers $headers -TimeoutSec 300
}
catch {
Write-Log " API request failed at doc #$docCount : $_" "ERROR"
throw
}
if ($response.value) {
foreach ($doc in $response.value) {
$lines = 0
if ($doc.$LineEntity) {
$lines = $doc.$LineEntity.Count
}
$lineCount += $lines
$docCount++
# Write each document immediately to disk (JSONL: one JSON object per line)
$jsonLine = $doc | ConvertTo-Json -Depth 10 -Compress
[System.IO.File]::AppendAllText($outputFile, $jsonLine + "`n")
}
}
Write-Log " Progress: $docCount documents, $lineCount lines"
# Next page
$currentUrl = $response.'@odata.nextLink'
}
Write-Log " $DocumentEntity : $docCount documents, $lineCount lines (complete)"
return ($docCount + $lineCount)
}
catch {
Write-Log " Failed to export ${DocumentEntity} with lines at doc #$docCount : $_" "WARN"
return 0
}
}
# Main execution
try {
Write-Log "========================================="
Write-Log "BC Data Export Script (API v2.0)"
Write-Log "========================================="
Write-Log "Environment: $environmentName"
Write-Log "Output Path: $OutputPath"
Write-Log "Entities to extract: $($entities.Count + $documentEntities.Count) ($($documentEntities.Count) with line items)"
# Create output directory
$exportDir = $OutputPath
if (-not (Test-Path $exportDir)) {
New-Item -ItemType Directory -Path $exportDir -Force | Out-Null
}
# Step 1: Get Azure AD token
Get-AzureADToken | Out-Null
# Step 2: Get companies
$companies = Get-Companies
if ($companies.Count -eq 0) {
Write-Log "No companies found in environment $environmentName" "ERROR"
exit 1
}
# Save companies list
$companies | ConvertTo-Json -Depth 10 | Out-File -FilePath (Join-Path $exportDir "companies.json") -Encoding utf8
# Filter to specific company if configured
$targetCompanies = $companies
if ($bcCompanyName) {
$targetCompanies = $companies | Where-Object { $_.name -eq $bcCompanyName -or $_.displayName -eq $bcCompanyName }
if ($targetCompanies.Count -eq 0) {
Write-Log "Company '$bcCompanyName' not found. Available: $($companies.name -join ', ')" "ERROR"
exit 1
}
Write-Log "Filtering to company: $bcCompanyName"
}
$totalRecords = 0
$totalEntities = 0
$failedEntities = @()
# Step 3: Export data for each company
foreach ($company in $targetCompanies) {
$companyName = $company.name
$companyId = $company.id
Write-Log "-----------------------------------------"
Write-Log "Exporting company: $companyName ($companyId)"
# Create company directory (sanitize name for filesystem)
$safeName = $companyName -replace '[\\/:*?"<>|]', '_'
$companyDir = Join-Path $exportDir $safeName
if (-not (Test-Path $companyDir)) {
New-Item -ItemType Directory -Path $companyDir -Force | Out-Null
}
# Export standalone entities
foreach ($entity in $entities) {
$count = Export-EntityData `
-CompanyId $companyId `
-CompanyName $companyName `
-EntityName $entity `
-OutputDir $companyDir
$totalRecords += $count
$totalEntities++
if ($count -eq 0) {
$failedEntities += "$companyName/$entity"
}
}
# Export document entities with their line items
# Uses small batches + streaming to disk to handle large datasets
foreach ($docEntity in $documentEntities.Keys) {
$lineEntity = $documentEntities[$docEntity]
$count = Export-DocumentWithLines `
-CompanyId $companyId `
-CompanyName $companyName `
-DocumentEntity $docEntity `
-LineEntity $lineEntity `
-OutputDir $companyDir
$totalRecords += $count
$totalEntities++
if ($count -eq 0) {
$failedEntities += "$companyName/$docEntity"
}
}
}
# Save export metadata
$metadata = @{
exportDate = (Get-Date -Format "yyyy-MM-dd HH:mm:ss UTC" -AsUTC)
environment = $environmentName
companies = @($targetCompanies | ForEach-Object { $_.name })
entitiesExported = $totalEntities
totalRecords = $totalRecords
failedEntities = $failedEntities
}
$metadata | ConvertTo-Json -Depth 5 | Out-File -FilePath (Join-Path $exportDir "export-metadata.json") -Encoding utf8
Write-Log "========================================="
Write-Log "Export completed"
Write-Log "Companies: $($targetCompanies.Count)"
Write-Log "Entities: $totalEntities"
Write-Log "Total records: $totalRecords"
if ($failedEntities.Count -gt 0) {
Write-Log "Failed/empty: $($failedEntities.Count) entities" "WARN"
}
Write-Log "========================================="
exit 0
}
catch {
Write-Log "Unexpected error: $_" "ERROR"
Write-Log "Stack trace: $($_.ScriptStackTrace)" "ERROR"
exit 1
}