Search
 
SCRIPT & CODE EXAMPLE
 

HTML

convert html table to csv powershell

function ConvertFrom-HtmlTableRow {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true, ValueFromPipeline = $true)]
        $htmlTableRow
        ,
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)]
        $headers
        ,
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)]
        [switch]$isHeader

    )
    process {
        $cols = $htmlTableRow | select -expandproperty td
        if($isHeader.IsPresent) {
            0..($cols.Count - 1) | %{$x=$cols[$_] | out-string; if(($x) -and ($x.Trim() -gt [string]::Empty)) {$x} else {("Column_{0:0000}" -f $_)}} #clean the headers to ensure each col has a name        
        } else {
            $colCount = ($cols | Measure-Object).Count - 1
            $result = new-object -TypeName PSObject
            0..$colCount | %{
                $colName = if($headers[$_]){$headers[$_]}else{("Column_{0:00000} -f $_")} #in case we have more columns than headers 
                $colValue = $cols[$_]
                $result | Add-Member NoteProperty $colName $colValue
            } 
            write-output $result
        }
    }
}

function ConvertFrom-HtmlTable {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true, ValueFromPipeline = $true)]
        $htmlTable
    )
    process {
        #currently only very basic <table><tr><td>...</td></tr></table> structure supported
        #could be improved to better understand tbody, th, nested tables, etc

        #$htmlTable.childNodes | ?{ $_.tagName -eq 'tr' } | ConvertFrom-HtmlTableRow

        #remove anything tags that aren't td or tr (simplifies our parsing of the data
        [xml]$cleanedHtml = ("<!DOCTYPE doctypeName [<!ENTITY nbsp ' '>]><root>{0}</root>" -f ($htmlTable | select -ExpandProperty innerHTML | %{(($_ | out-string) -replace '(</?t[rdh])[^>]*(/?>)|(?:<[^>]*>)','$1$2') -replace '(</?)(?:th)([^>]*/?>)','$1td$2'})) 
        [string[]]$headers = $cleanedHtml.root.tr | select -first 1 | ConvertFrom-HtmlTableRow -isHeader
        if ($headers.Count -gt 0) {
            $cleanedHtml.root.tr | select -skip 1 | ConvertFrom-HtmlTableRow -Headers $headers | select $headers
        }
    }
}

clear-host

[System.Uri]$url = 'https://en.wikipedia.org/wiki/List_of_countries_by_carbon_dioxide_emissions' 
$rqst = Invoke-WebRequest $url 
$rqst.ParsedHtml.getElementsByTagName('table') | ConvertFrom-HtmlTable 
Comment

PREVIOUS NEXT
Code Example
Html :: how to make the ignore part html like in ## 
Html :: button to hide and show div 
Html :: readhtml tags in python 
Html :: react router preventing assets from being served 
Html :: como hacer un boton flotante de volver arriba en html 
Html :: lorem long text 
Html :: filetype: env"DB_PASSWORDS" 
Html :: <div jsf:rendered include 
Html :: disable the hashtag to go to top 
Html :: code html view mobile inspect link head metalink 
Html :: tripadvisor button 
Html :: HTML List as 2 boxes 
Html :: what is class attribute in form 
Html :: Elements can be nested inside <a Tag 
Html :: center_div 
Html :: servicenow g: breakpoint 
Html :: mask 
Html :: partial url example 
Html :: falling star animation code 
Html :: how to set right to left font in html 
Html :: nio aandelen 
Html :: show html in laravel template 
Html :: =rept sheet 
Html :: hover message html 
Html :: 50 time-saving keyboard shortcuts in Excel for Windows 
Html :: how to toggle hover display text of button on click 
Html :: Cannot play media. No decoders for requested formats: text/html 
Html :: simplexml load file character encoding url 
Html :: An error occurred while retrieving token. DOMException: Registration failed - push service error 
Html :: bootstrap loader in os 
ADD CONTENT
Topic
Content
Source link
Name
2+3 =