Skip to content

Instantly share code, notes, and snippets.

@scriptingstudio
Last active August 27, 2023 18:32
Show Gist options
  • Select an option

  • Save scriptingstudio/6e72598d3a069285be65fcc96c25d13c to your computer and use it in GitHub Desktop.

Select an option

Save scriptingstudio/6e72598d3a069285be65fcc96c25d13c to your computer and use it in GitHub Desktop.
A simple way to get Excel content without Excel installed
<#
.SYNOPSIS
Reads data from Excel file into PowerShell object.
.DESCRIPTION
Uses OLEDB provider to execute a SQL statement against a XLSX file to read data. This cmdlet assumes a file path will be passed in and treating all results as text.
Features:
* Can read entire workbook
* Can get workbook sheet names
* Parameterized query
* Parameterized connectionstring
* Custom properties for output object
* Can skip first lines of output
* Can skip checking OleDB provider
.INPUTS
String.
.OUTPUTS
Object for a single worksheet.
Hashtable for multiple worksheets.
.PARAMETER Path
Specifies a path to the file to open.
.PARAMETER Query
Specifies a SQL statement. Note that when you use double quotes in a query string you must insert backtick char before the dollar char. In some cases you can use "%sheet%" as a substitution alias for the sheet name.
.PARAMETER SheetName
Specifies a sheet name to read from workbook. The value of "*" makes the cmdlet to get data from all sheets.
.PARAMETER Property
Specifies an alternate column header row for the imported data.
.PARAMETER Skip
Specifies a number of the first lines to remove from output.
.PARAMETER List
Indicates to get a sheet name list.
.PARAMETER Hdr
Connectionstring parameter. Indicates that the first row contains columnnames, not data.
.PARAMETER Imex
Connectionstring parameter. Specifies Im[port] Ex[port] mode. Valid values are 0,1,2. Default is 1.
.PARAMETER NoCheck
Indicates to skip checking OLEDB provider.
.EXAMPLE
Invoke-ExcelQuery .\test.xlsx 'select ROUND(F1) as [A1] from [sheet3$A1:A1]'
.EXAMPLE
Invoke-ExcelQuery .\test.xlsx 'select * from [%sheet%$]' -Nocheck -Skip 4
.NOTES
It is impossible to get Excel sheet names in workbook sheet order.
#>
function Invoke-ExcelQuery {
[alias('Import-Excel')]
param (
[Parameter(Mandatory)][ValidateNotNullOrEmpty()]
[String] $Path,
[Parameter(Mandatory)][ValidateNotNullOrEmpty()]
[String] $Query,
[String[]] $SheetName, # query parameter; TODO: array
[String[]] $Property, # output parameter
[int] $Skip, # skip first $skip records; output parameter
[Switch] $List, # retrieve sheetname list; command
[Switch] $Hdr, # connectionstring parameter
[Int] $Imex = 1, # connectionstring parameter
[String] $ConOptions, # experimental; connectionstring parameter
[Switch] $NoCheck # skip checking OleDB provider
)
try {
if (-not $NoCheck -and ([System.Data.OleDb.OleDbEnumerator]::new()).GetElements().SOURCES_NAME -notcontains "Microsoft.ACE.OLEDB.12.0") {
Write-Warning "Microsoft.ACE.OLEDB.12.0 provider is missing! You will not be able to query Excel files without it. Please install from https://www.microsoft.com/en-us/download/details.aspx?id=54920"
}
}
catch {
Write-Error "System.Data.OleDb is not working or you are on an unsupported platform."
return
}
$sheetlist = {
param ($ConnectionString)
$Connection = [System.Data.OleDb.OleDbConnection]::new($ConnectionString)
$Connection.Open()
$schema = $Connection.GetOleDbSchemaTable([System.Data.OleDb.OleDbSchemaGuid]::Tables, $null)
if ($schema) {
for ($i = 0; $i -lt $schema.Rows.Count; $i++) {
$worksheetName = $schema.Rows[$i]['TABLE_NAME'] #.ToString()
if ($worksheetName -match 'FilterData') {continue}
$worksheetName.Replace("'", ' ').Trim().TrimEnd('$')
}
}
$Connection.Close()
} # end scriptblock
# Compose the connection string
$hdrflag = if ($Hdr) {'Yes'} else {'NO'}
if ($Imex -lt 0 -or $Imex -gt 2) {$Imex = 1}
$FullName = (Get-ChildItem $Path).FullName
if ($ConOptions -and $ConOptions -notmatch ';$') {$ConOptions = "$ConOptions;"}
$constring = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=$FullName;Extended Properties='Excel 12.0 Xml;HDR=$hdrflag;IMEX=$Imex;Persist Security Info=False;$ConOptions'"
# Get all sheet names
if ($List) {
return @(. $sheetlist $constring)
}
# Get all sheets
if ($SheetName -eq '*' -or $SheetName.count -gt 1) {
if ($Query -notmatch '%sheet%') {
$Query = $Query -replace '\[.+\$(.*)\]','[%sheet%$$$1]'
}
$param = @{} + $PSBoundParameters
$param.remove('SheetName')
$param.remove('Property')
$param['NoCheck'] = $true
$param['Query'] = $Query
. $sheetlist $constring | ForEach-Object {
if ($SheetName -ne '*' -and $_ -notin $SheetName) {return} # continue
@{
Name = $_
Data = Invoke-ExcelQuery -SheetName $_ @param
}
}
return
}
# Adjust the query string
if ($SheetName) {
$shlist = . $sheetlist $constring
if ($SheetName -in $shlist) {
$Query = $Query.replace('%sheet%',$SheetName)
} else {
Write-Warning "The Excel workbook does not contain a sheet '$SheetName'."
return
}
} elseif ($Query -match '%sheet%') {
$shlist = . $sheetlist $constring
$Query = $Query.replace('%sheet%',@($shlist)[0])
} else {
$SheetName = if ($Query -match 'from \[(.+)\$\]') {$matches[1]}
}
# Getting Excel data
$DataTable = [System.Data.DataTable]::new()
$DataAdapter = [System.Data.OleDb.OleDbDataAdapter]::new($Query, $constring)
$null = $DataAdapter.Fill($DataTable)
$null = $DataAdapter.Dispose()
$Properties = if ($Property) {
$cols = @($DataTable.Columns.ColumnName)
if ($Property -eq '*') { # add new columns
$star = $Property.IndexOf('*')
if ($star -eq 0) {
$cols + ($Property -ne '*')
} else {
@($Property -ne '*') + $cols
}
} else { # property name converter
for ($i=0; $i -lt $Property.count; $i++) {
if ($cols[$i] -ne $null) {
@{name = $Property[$i]; expression = [scriptblock]::Create("`$_.$($cols[$i])")}
} else {$Property[$i]}
}
}
} else {$DataTable.Columns.ColumnName} # default output structure
$param = @{Property = $Properties}
if ($Skip -gt 0) {$param['Skip'] = $Skip}
$DataTable.Rows | Select-Object @param
} # END Invoke-ExcelQuery
# read the first sheet (in alphabetical order)
# NOTE: if you use double quotes in query string you must insert backtick char before the dollar char
# NOTE: %sheet% is a substitution alias for the sheet name
Invoke-ExcelQuery -path <your_excel_file> -Query "select * from [%sheet%`$]"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment