xlogI125’s blog

パソコン作業を効率化したい

'​ファイル名' -match '\p{C}'

外部から入手したファイルのファイル名にUnicode文字クラスエスケープの\p{C}\p{M}に一致するものが含まれていて扱いに少し困った。

文字列を確認するため PowerShell 5.1 にてTextElementEnumerator.MoveNext()を回して文字を確認する使い捨てスクリプトを作成した。

確認する文字列が無い場合の例

👨‍👩‍👧‍👦

(() => {
  const str0 =
    "👨" + "‍" +
    "👩" + "‍" +
    "👧" + "‍" +
    "👦";
  const str1 =
    String.fromCharCode(0xD83D, 0xDC68) + String.fromCharCode(0x200D) +
    String.fromCharCode(0xD83D, 0xDC69) + String.fromCharCode(0x200D) +
    String.fromCharCode(0xD83D, 0xDC67) + String.fromCharCode(0x200D) +
    String.fromCharCode(0xD83D, 0xDC66);
  document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerHTML = str0;
  document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerText = str1;
})();

🤷‍♂️

(() => {
  const str0 = "🤷" + "‍" + "♂" + "️";
  const str1 = String.fromCharCode(0xD83E, 0xDD37) + String.fromCharCode(0x200D) + String.fromCharCode(0x2642) + String.fromCharCode(0xFE0F);
  document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerHTML = str0;
  document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerText = str1;
})();

ガ

(() => {
  const str0 = "カ" + "゙";
  const str1 = String.fromCharCode(0x30AB) + String.fromCharCode(0x3099);
  document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerHTML = str0;
  document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerText = str1;
})();

カ゚

(() => {
  const str0 = "カ" + "゚";
  const str1 = String.fromCharCode(0x30AB) + String.fromCharCode(0x309A);
  document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerHTML = str0;
  document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerText = str1;
})();

使い捨てスクリプト

# PowerShell 5.1, Windows 11 (2025年6月頃)

$ErrorActionPreference = [System.Management.Automation.ActionPreference]::Stop
Set-StrictMode -Version Latest
Add-Type -AssemblyName PresentationFramework

# 入力用テキストボックス
$textBoxIn = New-Object -TypeName System.Windows.Controls.TextBox -Property @{
  Text                          = "入力用" + [string]::new(@(0x200B)) + "テキストホ" + [string]::new(@(0x3099)) + "ックス"
  AcceptsReturn                 = $true
  FontFamily                    = "MS ゴシック"
  FontSize                      = 22
  Height                        = 150
  VerticalScrollBarVisibility   = [System.Windows.Controls.ScrollBarVisibility]::Visible
  HorizontalScrollBarVisibility = [System.Windows.Controls.ScrollBarVisibility]::Visible
}

# 出力用テキストボックス
$textBoxOut = New-Object -TypeName System.Windows.Controls.TextBox -Property @{
  Text                          = "出力用テキストボックス"
  AcceptsReturn                 = $true
  FontFamily                    = "MS ゴシック"
  FontSize                      = 22
  VerticalScrollBarVisibility   = [System.Windows.Controls.ScrollBarVisibility]::Visible
  HorizontalScrollBarVisibility = [System.Windows.Controls.ScrollBarVisibility]::Visible
}

[System.Windows.Controls.DockPanel]::SetDock($textBoxIn, [System.Windows.Controls.Dock]::Top)
[System.Windows.Controls.DockPanel]::SetDock($textBoxOut, [System.Windows.Controls.Dock]::Bottom)

$dockPanel = New-Object -TypeName System.Windows.Controls.DockPanel
$dockPanel.Children.Add($textBoxIn)
$dockPanel.Children.Add($textBoxOut)

$window = New-Object -TypeName System.Windows.Window
$window.Content = $dockPanel

$psObjs = [System.Collections.Generic.List[PSObject]]::new()

$textChangedEventHandler = [System.Windows.Controls.TextChangedEventHandler] {
  param([object]$s, [System.Windows.Controls.TextChangedEventArgs]$e)
  $psObjs.Clear()
  # テキストボックスに入力されたテキストからTextElementEnumeratorを作成
  $tee = [System.Globalization.StringInfo]::GetTextElementEnumerator($s.Text)
  $scCat = [System.Collections.Specialized.StringCollection]::new()
  $scHex = [System.Collections.Specialized.StringCollection]::new()
  $scUni = [System.Collections.Specialized.StringCollection]::new()
  # MoveNext
  for ($numTextElement = 1; $tee.MoveNext(); $numTextElement++) {
    # TextElement(string型)を取得
    $textElement = $tee.GetTextElement()
    $chars = $textElement.ToCharArray()
    $scCat.Clear()
    $scHex.Clear()
    $scUni.Clear()
    for ($i = 0; $i -lt $chars.Length; $i++) {
      # UnicodeCategory列挙型の名前を追加
      $null = $scCat.Add([System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($chars[$i]))
      # 16進数表記の文字列を追加
      $null = $scHex.Add('0x{0:X4}' -f [uint16]$chars[$i])
    }
    try {
      for ($i = 0; $i -lt $textElement.Length; $i++) {
        # Unicodeコードポイントを取得
        $null = $scUni.Add('UNICHAR(HEX2DEC("{0:X4}"))' -f [char]::ConvertToUtf32($textElement, $i))
      }
    }
    catch {
      if ($_.Exception.InnerException.GetType() -eq [System.ArgumentException]) {
        $_ | Write-Verbose -Verbose
      }
    }
    $psObj = [PSCustomObject]@{
      Num             = $numTextElement
      UnicodeCategory = $scCat -join ', '
      UTF16           = '[string]::new(@({0}))' -f $($scHex -join ', ')
      Unicode         = $scUni -join ' & '
      TextElement     = $textElement
    }
    $psObjs.Add($psObj)
  }
  # テキストボックスに結果を表示
  $textBoxOut.Text = [string[]]@($psObjs | Format-Table | Out-String -Width 2048 -Stream | Where-Object { $_ -ne "" }) -join "`n"
}

$textBoxIn.add_TextChanged($textChangedEventHandler)

$window.ShowDialog()


# Excelのセルに貼り付ける用途
'=' + $($psObjs.Unicode -join ' & ') | Set-Clipboard

出力例

Num UnicodeCategory UTF16 Unicode TextElement
1 OtherPunctuation [string]::new(@(0x0027)) UNICHAR(HEX2DEC("0027")) '
2 Format [string]::new(@(0x200B)) UNICHAR(HEX2DEC("200B")) ​
3 OtherLetter [string]::new(@(0x30D5)) UNICHAR(HEX2DEC("30D5"))
4 OtherLetter [string]::new(@(0x30A1)) UNICHAR(HEX2DEC("30A1"))
5 OtherLetter [string]::new(@(0x30A4)) UNICHAR(HEX2DEC("30A4"))
6 OtherLetter [string]::new(@(0x30EB)) UNICHAR(HEX2DEC("30EB"))
7 OtherLetter [string]::new(@(0x540D)) UNICHAR(HEX2DEC("540D"))
8 OtherPunctuation [string]::new(@(0x0027)) UNICHAR(HEX2DEC("0027")) '

PowerShell 5.1 で Windows.Data.Pdf.PdfDocument.PageCount を取得 2

他人から入手したPDFファイルに対してPdfDocument.LoadFromStreamAsyncメソッドを試すと、うまく読み取れないPDFファイルが意外と多い。

# PowerShell 5.1, Windows 11 (2025年5月頃)
Set-StrictMode -Version Latest

Add-Type -AssemblyName PresentationFramework
Add-Type -AssemblyName System.Runtime.WindowsRuntime
$null = [Windows.Data.Pdf.PdfDocument, Windows.Data, ContentType = WindowsRuntime]

# AsTask
$methodInfo_AsTask_PdfDoc_IAsyncOp_CancelToken = $(
  [System.WindowsRuntimeSystemExtensions].GetMethods() | Where-Object { $_.Name -eq "AsTask" } | Where-Object {
    $pis = $_.GetParameters()
    $pis.Length -eq 2 -and
    $pis[0].ParameterType.Name -eq 'IAsyncOperation`1' -and
    $pis[1].ParameterType.Name -eq 'CancellationToken'
  }
).MakeGenericMethod(@([Windows.Data.Pdf.PdfDocument]))

# PDFファイルのパスを取得
$fPath = & {
  $ofd = New-Object -TypeName Microsoft.Win32.OpenFileDialog -Property @{
    Filter      = "PDF File|*.pdf"
    Multiselect = $false
  }
  if ($ofd.ShowDialog()) {
    $ofd.FileNames[0]
  }
}

$fStream = [System.IO.File]::Open($fPath, [System.IO.FileMode]::Open, [System.IO.FileAccess]::Read, [System.IO.FileShare]::ReadWrite -bor [System.IO.FileShare]::Delete)
$raStream = [System.IO.WindowsRuntimeStreamExtensions]::AsRandomAccessStream($fStream)

# PdfDocument.LoadFromStreamAsync メソッド
$iAsyncOp_PdfDoc = [Windows.Data.Pdf.PdfDocument]::LoadFromStreamAsync($raStream, "password")

$cancelTokenSrc = [System.Threading.CancellationTokenSource]::new()

# AsTask
$task_PdfDoc = $methodInfo_AsTask_PdfDoc_IAsyncOp_CancelToken.Invoke($null, @($iAsyncOp_PdfDoc, $cancelTokenSrc.Token))

try {
  if ($task_PdfDoc.Wait(15000)) {
    $pdfDoc = $task_PdfDoc.Result
    # PDF文書のページ数などを表示
    $pdfDoc | Format-Table | Out-String -Width 80 -Stream | Where-Object { $_ -ne "" } | Write-Host -ForegroundColor Yellow
  }
  else {
    $cancelTokenSrc.Cancel()
  }
}
catch {
  $_ | Write-Host -ForegroundColor Red
  if ($_.Exception.InnerException -is [System.AggregateException]) {
    "System.AggregateException" | Write-Host -ForegroundColor Yellow
    $_.Exception.InnerException | Format-List -Force | Out-String -Width 80 -Stream | Where-Object { $_ -ne "" } | Write-Host -ForegroundColor Yellow
  }
}

$raStream.Dispose()
$fStream.Dispose()

PowerShell 5.1 で Windows.Data.Pdf.PdfDocument.PageCount を取得

[Windows.Data.Pdf.PdfDocument]が見つかるかの確認

# PowerShell 5.1, Windows 11 (2025年4月頃)

$str = 'Windows.Data.Pdf.PdfDocument, Windows.Data, Version=255.255.255.255, Culture=neutral, PublicKeyToken=null, ContentType=WindowsRuntime'
[System.Type]::GetType($str).AssemblyQualifiedName | Write-Host -ForegroundColor DarkYellow
#=> Windows.Data.Pdf.PdfDocument, Windows.Data, Version=255.255.255.255, Culture=neutral, PublicKeyToken=null, ContentType=WindowsRuntime


WindowsRuntimeSystemExtensions.AsTaskメソッドに関連して[System.WindowsRuntimeSystemExtensions].GetMethods()の確認

# PowerShell 5.1, Windows 11 (2025年4月頃)

Set-StrictMode -Version Latest
Add-Type -AssemblyName System.Runtime.WindowsRuntime

$mInfos = [System.WindowsRuntimeSystemExtensions].GetMethods()

$psObjs = [PSObject[]]::new($mInfos.Length)
for ($i = 0; $i -lt $psObjs.Length; $i++) {
    $psObjs[$i] = [PSObject]::new()
}

$numParamsMax = 0

for ($i = 0; $i -lt $psObjs.Length; $i++) {
    $psObjs[$i] | Add-Member -NotePropertyName "Name" -NotePropertyValue $mInfos[$i].Name
    $psObjs[$i] | Add-Member -NotePropertyName "ReturnType" -NotePropertyValue $mInfos[$i].ReturnType.ToString()
    $pInfos = $mInfos[$i].GetParameters()
    $psObjs[$i] | Add-Member -NotePropertyName "NumParams" -NotePropertyValue $pInfos.Length
    for ($k = 0; $k -lt $pInfos.Length; $k++) {
        $psObjs[$i] | Add-Member -NotePropertyName $("ParameterType" + $k.ToString("00")) -NotePropertyValue $pInfos[$k].ParameterType.Name
    }
    $numParamsMax = [System.Math]::Max($pInfos.Length, $numParamsMax)
}

$propNamesParameterType = [string[]]::new($numParamsMax)
for ($i = 0; $i -lt $propNamesParameterType.Length; $i++) {
    $propNamesParameterType[$i] = "ParameterType" + $i.ToString("00")
}

$psObjs | Select-Object -Property $(@("Name", "ReturnType", "NumParams") + $propNamesParameterType) | Out-GridView


PDFのページ数を取得

# PowerShell 5.1, Windows 11 (2025年4月頃)

$ErrorActionPreference = [System.Management.Automation.ActionPreference]::Stop
$VerbosePreference = [System.Management.Automation.ActionPreference]::Continue
Set-StrictMode -Version Latest

$filePath = "${env:USERPROFILE}\Desktop\test.pdf"
$fStream = [System.IO.File]::OpenRead($filePath)
Add-Type -AssemblyName System.Runtime.WindowsRuntime
$raStream = [System.IO.WindowsRuntimeStreamExtensions]::AsRandomAccessStream($fStream)

$null = [Windows.Data.Pdf.PdfDocument, Windows.Data, ContentType = WindowsRuntime]
$iAsyncOpPdfDoc = [Windows.Data.Pdf.PdfDocument]::LoadFromStreamAsync($raStream)

Add-Type -AssemblyName System.Runtime.WindowsRuntime

# メソッド名が AsTask であるものを選択
$miAsTasks = [System.Reflection.MethodInfo[]]@(
    [System.WindowsRuntimeSystemExtensions].GetMethods() | Where-Object { $_.Name -eq "AsTask" }
)

# パラメーターが IAsyncOperation<TResult> だけのものを選択
$miAsTaskIAsyncOpGen = [System.Reflection.MethodInfo](
    $miAsTasks | Where-Object {
        $paramInfos = $_.GetParameters()
        if ($paramInfos.Length -eq 1) {
            $paramInfos[0].ParameterType.Name -eq 'IAsyncOperation`1'
        }
    }
)

# 戻り値とパラメーターを確認
$miAsTaskIAsyncOpGen.ReturnType.ToString() | Write-Verbose
#=> 詳細: System.Threading.Tasks.Task`1[TResult]
$miAsTaskIAsyncOpGen.GetParameters() | ForEach-Object { $_.ParameterType.ToString() | Write-Verbose }
#=> 詳細: Windows.Foundation.IAsyncOperation`1[TResult]

# MakeGenericMethod
$miAsTaskIAsyncOpPdfDoc = $miAsTaskIAsyncOpGen.MakeGenericMethod([System.Type[]]@([Windows.Data.Pdf.PdfDocument]))

# 戻り値とパラメーターを確認
$miAsTaskIAsyncOpPdfDoc.ReturnType.ToString() | Write-Verbose
#=> 詳細: System.Threading.Tasks.Task`1[Windows.Data.Pdf.PdfDocument]
$miAsTaskIAsyncOpPdfDoc.GetParameters() | ForEach-Object { $_.ParameterType.ToString() | Write-Verbose }
#=> 詳細: Windows.Foundation.IAsyncOperation`1[Windows.Data.Pdf.PdfDocument]

# Task<PdfDocument> を取得
$taskPdfDoc = $miAsTaskIAsyncOpPdfDoc.Invoke($null, @($iAsyncOpPdfDoc))

$taskPdfDoc.Wait()

# Task<PdfDocument>.Result
$pdfDoc = $taskPdfDoc.Result

# PDFのページ数
$pdfDoc.PageCount

$fStream.Dispose()