外部から入手したファイルのファイル名にUnicode文字クラスエスケープの\p{C}
や\p{M}
に一致するものが含まれていて扱いに少し困った。
文字列を確認するため PowerShell 5.1 にてTextElementEnumerator.MoveNext()
を回して文字を確認する使い捨てスクリプトを作成した。
確認する文字列が無い場合の例
👨👩👧👦
(() => { const str0 = "👨" + "‍" + "👩" + "‍" + "👧" + "‍" + "👦"; const str1 = String.fromCharCode(0xD83D, 0xDC68) + String.fromCharCode(0x200D) + String.fromCharCode(0xD83D, 0xDC69) + String.fromCharCode(0x200D) + String.fromCharCode(0xD83D, 0xDC67) + String.fromCharCode(0x200D) + String.fromCharCode(0xD83D, 0xDC66); document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerHTML = str0; document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerText = str1; })();
🤷♂️
(() => { const str0 = "🤷" + "‍" + "♂" + "️"; const str1 = String.fromCharCode(0xD83E, 0xDD37) + String.fromCharCode(0x200D) + String.fromCharCode(0x2642) + String.fromCharCode(0xFE0F); document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerHTML = str0; document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerText = str1; })();
ガ
(() => { const str0 = "カ" + "゙"; const str1 = String.fromCharCode(0x30AB) + String.fromCharCode(0x3099); document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerHTML = str0; document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerText = str1; })();
カ゚
(() => { const str0 = "カ" + "゚"; const str1 = String.fromCharCode(0x30AB) + String.fromCharCode(0x309A); document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerHTML = str0; document.body.appendChild(document.createElement("div")).appendChild(document.createElement("textarea")).innerText = str1; })();
使い捨てスクリプト
# PowerShell 5.1, Windows 11 (2025年6月頃) $ErrorActionPreference = [System.Management.Automation.ActionPreference]::Stop Set-StrictMode -Version Latest Add-Type -AssemblyName PresentationFramework # 入力用テキストボックス $textBoxIn = New-Object -TypeName System.Windows.Controls.TextBox -Property @{ Text = "入力用" + [string]::new(@(0x200B)) + "テキストホ" + [string]::new(@(0x3099)) + "ックス" AcceptsReturn = $true FontFamily = "MS ゴシック" FontSize = 22 Height = 150 VerticalScrollBarVisibility = [System.Windows.Controls.ScrollBarVisibility]::Visible HorizontalScrollBarVisibility = [System.Windows.Controls.ScrollBarVisibility]::Visible } # 出力用テキストボックス $textBoxOut = New-Object -TypeName System.Windows.Controls.TextBox -Property @{ Text = "出力用テキストボックス" AcceptsReturn = $true FontFamily = "MS ゴシック" FontSize = 22 VerticalScrollBarVisibility = [System.Windows.Controls.ScrollBarVisibility]::Visible HorizontalScrollBarVisibility = [System.Windows.Controls.ScrollBarVisibility]::Visible } [System.Windows.Controls.DockPanel]::SetDock($textBoxIn, [System.Windows.Controls.Dock]::Top) [System.Windows.Controls.DockPanel]::SetDock($textBoxOut, [System.Windows.Controls.Dock]::Bottom) $dockPanel = New-Object -TypeName System.Windows.Controls.DockPanel $dockPanel.Children.Add($textBoxIn) $dockPanel.Children.Add($textBoxOut) $window = New-Object -TypeName System.Windows.Window $window.Content = $dockPanel $psObjs = [System.Collections.Generic.List[PSObject]]::new() $textChangedEventHandler = [System.Windows.Controls.TextChangedEventHandler] { param([object]$s, [System.Windows.Controls.TextChangedEventArgs]$e) $psObjs.Clear() # テキストボックスに入力されたテキストからTextElementEnumeratorを作成 $tee = [System.Globalization.StringInfo]::GetTextElementEnumerator($s.Text) $scCat = [System.Collections.Specialized.StringCollection]::new() $scHex = [System.Collections.Specialized.StringCollection]::new() $scUni = [System.Collections.Specialized.StringCollection]::new() # MoveNext for ($numTextElement = 1; $tee.MoveNext(); $numTextElement++) { # TextElement(string型)を取得 $textElement = $tee.GetTextElement() $chars = $textElement.ToCharArray() $scCat.Clear() $scHex.Clear() $scUni.Clear() for ($i = 0; $i -lt $chars.Length; $i++) { # UnicodeCategory列挙型の名前を追加 $null = $scCat.Add([System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($chars[$i])) # 16進数表記の文字列を追加 $null = $scHex.Add('0x{0:X4}' -f [uint16]$chars[$i]) } try { for ($i = 0; $i -lt $textElement.Length; $i++) { # Unicodeコードポイントを取得 $null = $scUni.Add('UNICHAR(HEX2DEC("{0:X4}"))' -f [char]::ConvertToUtf32($textElement, $i)) } } catch { if ($_.Exception.InnerException.GetType() -eq [System.ArgumentException]) { $_ | Write-Verbose -Verbose } } $psObj = [PSCustomObject]@{ Num = $numTextElement UnicodeCategory = $scCat -join ', ' UTF16 = '[string]::new(@({0}))' -f $($scHex -join ', ') Unicode = $scUni -join ' & ' TextElement = $textElement } $psObjs.Add($psObj) } # テキストボックスに結果を表示 $textBoxOut.Text = [string[]]@($psObjs | Format-Table | Out-String -Width 2048 -Stream | Where-Object { $_ -ne "" }) -join "`n" } $textBoxIn.add_TextChanged($textChangedEventHandler) $window.ShowDialog() # Excelのセルに貼り付ける用途 '=' + $($psObjs.Unicode -join ' & ') | Set-Clipboard
出力例
Num | UnicodeCategory | UTF16 | Unicode | TextElement |
1 | OtherPunctuation | [string]::new(@(0x0027)) | UNICHAR(HEX2DEC("0027")) | ' |
2 | Format | [string]::new(@(0x200B)) | UNICHAR(HEX2DEC("200B")) | |
3 | OtherLetter | [string]::new(@(0x30D5)) | UNICHAR(HEX2DEC("30D5")) | フ |
4 | OtherLetter | [string]::new(@(0x30A1)) | UNICHAR(HEX2DEC("30A1")) | ァ |
5 | OtherLetter | [string]::new(@(0x30A4)) | UNICHAR(HEX2DEC("30A4")) | イ |
6 | OtherLetter | [string]::new(@(0x30EB)) | UNICHAR(HEX2DEC("30EB")) | ル |
7 | OtherLetter | [string]::new(@(0x540D)) | UNICHAR(HEX2DEC("540D")) | 名 |
8 | OtherPunctuation | [string]::new(@(0x0027)) | UNICHAR(HEX2DEC("0027")) | ' |