Last active
August 27, 2023 18:32
-
-
Save scriptingstudio/6e72598d3a069285be65fcc96c25d13c to your computer and use it in GitHub Desktop.
A simple way to get Excel content without Excel installed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <# | |
| .SYNOPSIS | |
| Reads data from Excel file into PowerShell object. | |
| .DESCRIPTION | |
| Uses OLEDB provider to execute a SQL statement against a XLSX file to read data. This cmdlet assumes a file path will be passed in and treating all results as text. | |
| Features: | |
| * Can read entire workbook | |
| * Can get workbook sheet names | |
| * Parameterized query | |
| * Parameterized connectionstring | |
| * Custom properties for output object | |
| * Can skip first lines of output | |
| * Can skip checking OleDB provider | |
| .INPUTS | |
| String. | |
| .OUTPUTS | |
| Object for a single worksheet. | |
| Hashtable for multiple worksheets. | |
| .PARAMETER Path | |
| Specifies a path to the file to open. | |
| .PARAMETER Query | |
| Specifies a SQL statement. Note that when you use double quotes in a query string you must insert backtick char before the dollar char. In some cases you can use "%sheet%" as a substitution alias for the sheet name. | |
| .PARAMETER SheetName | |
| Specifies a sheet name to read from workbook. The value of "*" makes the cmdlet to get data from all sheets. | |
| .PARAMETER Property | |
| Specifies an alternate column header row for the imported data. | |
| .PARAMETER Skip | |
| Specifies a number of the first lines to remove from output. | |
| .PARAMETER List | |
| Indicates to get a sheet name list. | |
| .PARAMETER Hdr | |
| Connectionstring parameter. Indicates that the first row contains columnnames, not data. | |
| .PARAMETER Imex | |
| Connectionstring parameter. Specifies Im[port] Ex[port] mode. Valid values are 0,1,2. Default is 1. | |
| .PARAMETER NoCheck | |
| Indicates to skip checking OLEDB provider. | |
| .EXAMPLE | |
| Invoke-ExcelQuery .\test.xlsx 'select ROUND(F1) as [A1] from [sheet3$A1:A1]' | |
| .EXAMPLE | |
| Invoke-ExcelQuery .\test.xlsx 'select * from [%sheet%$]' -Nocheck -Skip 4 | |
| .NOTES | |
| It is impossible to get Excel sheet names in workbook sheet order. | |
| #> | |
| function Invoke-ExcelQuery { | |
| [alias('Import-Excel')] | |
| param ( | |
| [Parameter(Mandatory)][ValidateNotNullOrEmpty()] | |
| [String] $Path, | |
| [Parameter(Mandatory)][ValidateNotNullOrEmpty()] | |
| [String] $Query, | |
| [String[]] $SheetName, # query parameter; TODO: array | |
| [String[]] $Property, # output parameter | |
| [int] $Skip, # skip first $skip records; output parameter | |
| [Switch] $List, # retrieve sheetname list; command | |
| [Switch] $Hdr, # connectionstring parameter | |
| [Int] $Imex = 1, # connectionstring parameter | |
| [String] $ConOptions, # experimental; connectionstring parameter | |
| [Switch] $NoCheck # skip checking OleDB provider | |
| ) | |
| try { | |
| if (-not $NoCheck -and ([System.Data.OleDb.OleDbEnumerator]::new()).GetElements().SOURCES_NAME -notcontains "Microsoft.ACE.OLEDB.12.0") { | |
| Write-Warning "Microsoft.ACE.OLEDB.12.0 provider is missing! You will not be able to query Excel files without it. Please install from https://www.microsoft.com/en-us/download/details.aspx?id=54920" | |
| } | |
| } | |
| catch { | |
| Write-Error "System.Data.OleDb is not working or you are on an unsupported platform." | |
| return | |
| } | |
| $sheetlist = { | |
| param ($ConnectionString) | |
| $Connection = [System.Data.OleDb.OleDbConnection]::new($ConnectionString) | |
| $Connection.Open() | |
| $schema = $Connection.GetOleDbSchemaTable([System.Data.OleDb.OleDbSchemaGuid]::Tables, $null) | |
| if ($schema) { | |
| for ($i = 0; $i -lt $schema.Rows.Count; $i++) { | |
| $worksheetName = $schema.Rows[$i]['TABLE_NAME'] #.ToString() | |
| if ($worksheetName -match 'FilterData') {continue} | |
| $worksheetName.Replace("'", ' ').Trim().TrimEnd('$') | |
| } | |
| } | |
| $Connection.Close() | |
| } # end scriptblock | |
| # Compose the connection string | |
| $hdrflag = if ($Hdr) {'Yes'} else {'NO'} | |
| if ($Imex -lt 0 -or $Imex -gt 2) {$Imex = 1} | |
| $FullName = (Get-ChildItem $Path).FullName | |
| if ($ConOptions -and $ConOptions -notmatch ';$') {$ConOptions = "$ConOptions;"} | |
| $constring = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=$FullName;Extended Properties='Excel 12.0 Xml;HDR=$hdrflag;IMEX=$Imex;Persist Security Info=False;$ConOptions'" | |
| # Get all sheet names | |
| if ($List) { | |
| return @(. $sheetlist $constring) | |
| } | |
| # Get all sheets | |
| if ($SheetName -eq '*' -or $SheetName.count -gt 1) { | |
| if ($Query -notmatch '%sheet%') { | |
| $Query = $Query -replace '\[.+\$(.*)\]','[%sheet%$$$1]' | |
| } | |
| $param = @{} + $PSBoundParameters | |
| $param.remove('SheetName') | |
| $param.remove('Property') | |
| $param['NoCheck'] = $true | |
| $param['Query'] = $Query | |
| . $sheetlist $constring | ForEach-Object { | |
| if ($SheetName -ne '*' -and $_ -notin $SheetName) {return} # continue | |
| @{ | |
| Name = $_ | |
| Data = Invoke-ExcelQuery -SheetName $_ @param | |
| } | |
| } | |
| return | |
| } | |
| # Adjust the query string | |
| if ($SheetName) { | |
| $shlist = . $sheetlist $constring | |
| if ($SheetName -in $shlist) { | |
| $Query = $Query.replace('%sheet%',$SheetName) | |
| } else { | |
| Write-Warning "The Excel workbook does not contain a sheet '$SheetName'." | |
| return | |
| } | |
| } elseif ($Query -match '%sheet%') { | |
| $shlist = . $sheetlist $constring | |
| $Query = $Query.replace('%sheet%',@($shlist)[0]) | |
| } else { | |
| $SheetName = if ($Query -match 'from \[(.+)\$\]') {$matches[1]} | |
| } | |
| # Getting Excel data | |
| $DataTable = [System.Data.DataTable]::new() | |
| $DataAdapter = [System.Data.OleDb.OleDbDataAdapter]::new($Query, $constring) | |
| $null = $DataAdapter.Fill($DataTable) | |
| $null = $DataAdapter.Dispose() | |
| $Properties = if ($Property) { | |
| $cols = @($DataTable.Columns.ColumnName) | |
| if ($Property -eq '*') { # add new columns | |
| $star = $Property.IndexOf('*') | |
| if ($star -eq 0) { | |
| $cols + ($Property -ne '*') | |
| } else { | |
| @($Property -ne '*') + $cols | |
| } | |
| } else { # property name converter | |
| for ($i=0; $i -lt $Property.count; $i++) { | |
| if ($cols[$i] -ne $null) { | |
| @{name = $Property[$i]; expression = [scriptblock]::Create("`$_.$($cols[$i])")} | |
| } else {$Property[$i]} | |
| } | |
| } | |
| } else {$DataTable.Columns.ColumnName} # default output structure | |
| $param = @{Property = $Properties} | |
| if ($Skip -gt 0) {$param['Skip'] = $Skip} | |
| $DataTable.Rows | Select-Object @param | |
| } # END Invoke-ExcelQuery | |
| # read the first sheet (in alphabetical order) | |
| # NOTE: if you use double quotes in query string you must insert backtick char before the dollar char | |
| # NOTE: %sheet% is a substitution alias for the sheet name | |
| Invoke-ExcelQuery -path <your_excel_file> -Query "select * from [%sheet%`$]" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment