made much better

2024-03-08 16:14:04 -06:00 · 2024-03-08 16:14:04 -06:00 · 39374944b7
commit 39374944b7
parent 206d4840ba
5 changed files with 681 additions and 29 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 /target
 *.txt
 *.sh
--- a/README.md
+++ b/README.md
@ -1,4 +1,7 @@
-Generate a file containing every single UTF8 codepoint, its glyph, and its name.
+Prints unicode characters to stdout.
-First do `cargo run`, then `sh filter_and_format.sh`. Your final file will be
+Specify which unicode blocks you want by uncommenting them in `src/blocks.rs`.
-called `formatted.txt`. Do what you want with it.
+Do `cargo run` and the codepoint, glyph, and name of every character in the
 specified blocks will be printed to stdout, separated by a tab `\t`.
 That's it. Enjoy
--- a/filter_and_format.sh
+++ b/filter_and_format.sh
@ -1,3 +0,0 @@
 perl -ne 'if (/Private Use/i) {next;} else {print;}' all_chars.txt > filtered.txt
 perl -F'\t' -ae 'chomp @F; print("$F[0]  $F[1]  ", ($F[2] =~ s/\b(.)(.*?)\b/\U$1\L$2/gr), "\n")' filtered.txt > formatted.txt
--- a/src/blocks.rs
+++ b/src/blocks.rs
@ -0,0 +1,658 @@
 pub const BLOCKS: &[(u32, u32)] = &[
    // Basic Latin
    (0x0000, 0x007F),
    // Latin-1 Supplement
    (0x0080, 0x00FF),
    // Latin Extended-A
    (0x0100, 0x017F),
    // Latin Extended-B
    (0x0180, 0x024F),
    // IPA Extensions
    (0x0250, 0x02AF),
    // Spacing Modifier Letters
    //(0x02B0, 0x02FF),
    // Combining Diacritical Marks
    //(0x0300, 0x036F),
    // Greek and Coptic
    (0x0370, 0x03FF),
    // Cyrillic
    (0x0400, 0x04FF),
    // Cyrillic Supplement
    (0x0500, 0x052F),
    // Armenian
    //(0x0530, 0x058F),
    // Hebrew
    //(0x0590, 0x05FF),
    // Arabic
    //(0x0600, 0x06FF),
    // Syriac
    //(0x0700, 0x074F),
    // Arabic Supplement
    //(0x0750, 0x077F),
    // Thaana
    //(0x0780, 0x07BF),
    // NKo
    //(0x07C0, 0x07FF),
    // Samaritan
    //(0x0800, 0x083F),
    // Mandaic
    //(0x0840, 0x085F),
    // Syriac Supplement
    //(0x0860, 0x086F),
    // Arabic Extended-B
    //(0x0870, 0x089F),
    // Arabic Extended-A
    //(0x08A0, 0x08FF),
    // Devanagari
    //(0x0900, 0x097F),
    // Bengali
    //(0x0980, 0x09FF),
    // Gurmukhi
    //(0x0A00, 0x0A7F),
    // Gujarati
    //(0x0A80, 0x0AFF),
    // Oriya
    //(0x0B00, 0x0B7F),
    // Tamil
    //(0x0B80, 0x0BFF),
    // Telugu
    //(0x0C00, 0x0C7F),
    // Kannada
    //(0x0C80, 0x0CFF),
    // Malayalam
    //(0x0D00, 0x0D7F),
    // Sinhala
    //(0x0D80, 0x0DFF),
    // Thai
    //(0x0E00, 0x0E7F),
    // Lao
    //(0x0E80, 0x0EFF),
    // Tibetan
    //(0x0F00, 0x0FFF),
    // Myanmar
    //(0x1000, 0x109F),
    // Georgian
    //(0x10A0, 0x10FF),
    // Hangul Jamo
    //(0x1100, 0x11FF),
    // Ethiopic
    //(0x1200, 0x137F),
    // Ethiopic Supplement
    //(0x1380, 0x139F),
    // Cherokee
    //(0x13A0, 0x13FF),
    // Unified Canadian Aboriginal Syllabics
    //(0x1400, 0x167F),
    // Ogham
    //(0x1680, 0x169F),
    // Runic
    //(0x16A0, 0x16FF),
    // Tagalog
    //(0x1700, 0x171F),
    // Hanunoo
    //(0x1720, 0x173F),
    // Buhid
    //(0x1740, 0x175F),
    // Tagbanwa
    //(0x1760, 0x177F),
    // Khmer
    //(0x1780, 0x17FF),
    // Mongolian
    //(0x1800, 0x18AF),
    // Unified Canadian Aboriginal Syllabics Extended
    //(0x18B0, 0x18FF),
    // Limbu
    //(0x1900, 0x194F),
    // Tai Le
    //(0x1950, 0x197F),
    // New Tai Lue
    //(0x1980, 0x19DF),
    // Khmer Symbols
    //(0x19E0, 0x19FF),
    // Buginese
    //(0x1A00, 0x1A1F),
    // Tai Tham
    //(0x1A20, 0x1AAF),
    // Combining Diacritical Marks Extended
    //(0x1AB0, 0x1AFF),
    // Balinese
    //(0x1B00, 0x1B7F),
    // Sundanese
    //(0x1B80, 0x1BBF),
    // Batak
    //(0x1BC0, 0x1BFF),
    // Lepcha
    //(0x1C00, 0x1C4F),
    // Ol Chiki
    //(0x1C50, 0x1C7F),
    // Cyrillic Extended-C
    //(0x1C80, 0x1C8F),
    // Georgian Extended
    //(0x1C90, 0x1CBF),
    // Sundanese Supplement
    //(0x1CC0, 0x1CCF),
    // Vedic Extensions
    //(0x1CD0, 0x1CFF),
    // Phonetic Extensions
    //(0x1D00, 0x1D7F),
    // Phonetic Extensions Supplement
    //(0x1D80, 0x1DBF),
    // Combining Diacritical Marks Supplement
    //(0x1DC0, 0x1DFF),
    // Latin Extended Additional
    (0x1E00, 0x1EFF),
    // Greek Extended
    (0x1F00, 0x1FFF),
    // General Punctuation
    (0x2000, 0x206F),
    // Superscripts and Subscripts
    (0x2070, 0x209F),
    // Currency Symbols
    (0x20A0, 0x20CF),
    // Combining Diacritical Marks for Symbols
    (0x20D0, 0x20FF),
    // Letterlike Symbols
    (0x2100, 0x214F),
    // Number Forms
    //(0x2150, 0x218F),
    // Arrows
    (0x2190, 0x21FF),
    // Mathematical Operators
    (0x2200, 0x22FF),
    // Miscellaneous Technical
    (0x2300, 0x23FF),
    // Control Pictures
    (0x2400, 0x243F),
    // Optical Character Recognition
    //(0x2440, 0x245F),
    // Enclosed Alphanumerics
    //(0x2460, 0x24FF),
    // Box Drawing
    (0x2500, 0x257F),
    // Block Elements
    (0x2580, 0x259F),
    // Geometric Shapes
    (0x25A0, 0x25FF),
    // Miscellaneous Symbols
    (0x2600, 0x26FF),
    // Dingbats
    (0x2700, 0x27BF),
    // Miscellaneous Mathematical Symbols-A
    (0x27C0, 0x27EF),
    // Supplemental Arrows-A
    (0x27F0, 0x27FF),
    // Braille Patterns
    //(0x2800, 0x28FF),
    // Supplemental Arrows-B
    (0x2900, 0x297F),
    // Miscellaneous Mathematical Symbols-B
    (0x2980, 0x29FF),
    // Supplemental Mathematical Operators
    (0x2A00, 0x2AFF),
    // Miscellaneous Symbols and Arrows
    (0x2B00, 0x2BFF),
    // Glagolitic
    //(0x2C00, 0x2C5F),
    // Latin Extended-C
    (0x2C60, 0x2C7F),
    // Coptic
    //(0x2C80, 0x2CFF),
    // Georgian Supplement
    //(0x2D00, 0x2D2F),
    // Tifinagh
    //(0x2D30, 0x2D7F),
    // Ethiopic Extended
    //(0x2D80, 0x2DDF),
    // Cyrillic Extended-A
    (0x2DE0, 0x2DFF),
    // Supplemental Punctuation
    //(0x2E00, 0x2E7F),
    // CJK Radicals Supplement
    //(0x2E80, 0x2EFF),
    // Kangxi Radicals
    //(0x2F00, 0x2FDF),
    // Ideographic Description Characters
    //(0x2FF0, 0x2FFF),
    // CJK Symbols and Punctuation
    //(0x3000, 0x303F),
    // Hiragana
    //(0x3040, 0x309F),
    // Katakana
    //(0x30A0, 0x30FF),
    // Bopomofo
    //(0x3100, 0x312F),
    // Hangul Compatibility Jamo
    //(0x3130, 0x318F),
    // Kanbun
    //(0x3190, 0x319F),
    // Bopomofo Extended
    //(0x31A0, 0x31BF),
    // CJK Strokes
    //(0x31C0, 0x31EF),
    // Katakana Phonetic Extensions
    //(0x31F0, 0x31FF),
    // Enclosed CJK Letters and Months
    //(0x3200, 0x32FF),
    // CJK Compatibility
    //(0x3300, 0x33FF),
    // CJK Unified Ideographs Extension A
    //(0x3400, 0x4DBF),
    // Yijing Hexagram Symbols
    //(0x4DC0, 0x4DFF),
    // CJK Unified Ideographs
    //(0x4E00, 0x9FFF),
    // Yi Syllables
    //(0xA000, 0xA48F),
    // Yi Radicals
    //(0xA490, 0xA4CF),
    // Lisu
    //(0xA4D0, 0xA4FF),
    // Vai
    //(0xA500, 0xA63F),
    // Cyrillic Extended-B
    //(0xA640, 0xA69F),
    // Bamum
    //(0xA6A0, 0xA6FF),
    // Modifier Tone Letters
    //(0xA700, 0xA71F),
    // Latin Extended-D
    (0xA720, 0xA7FF),
    // Syloti Nagri
    //(0xA800, 0xA82F),
    // Common Indic Number Forms
    //(0xA830, 0xA83F),
    // Phags-pa
    //(0xA840, 0xA87F),
    // Saurashtra
    //(0xA880, 0xA8DF),
    // Devanagari Extended
    //(0xA8E0, 0xA8FF),
    // Kayah Li
    //(0xA900, 0xA92F),
    // Rejang
    //(0xA930, 0xA95F),
    // Hangul Jamo Extended-A
    //(0xA960, 0xA97F),
    // Javanese
    //(0xA980, 0xA9DF),
    // Myanmar Extended-B
    //(0xA9E0, 0xA9FF),
    // Cham
    //(0xAA00, 0xAA5F),
    // Myanmar Extended-A
    //(0xAA60, 0xAA7F),
    // Tai Viet
    //(0xAA80, 0xAADF),
    // Meetei Mayek Extensions
    //(0xAAE0, 0xAAFF),
    // Ethiopic Extended-A
    //(0xAB00, 0xAB2F),
    // Latin Extended-E
    //(0xAB30, 0xAB6F),
    // Cherokee Supplement
    //(0xAB70, 0xABBF),
    // Meetei Mayek
    //(0xABC0, 0xABFF),
    // Hangul Syllables
    //(0xAC00, 0xD7AF),
    // Hangul Jamo Extended-B
    //(0xD7B0, 0xD7FF),
    // High Surrogates
    //(0xD800, 0xDB7F),
    // High Private Use Surrogates
    //(0xDB80, 0xDBFF),
    // Low Surrogates
    //(0xDC00, 0xDFFF),
    // Private Use Area
    //(0xE000, 0xF8FF),
    // CJK Compatibility Ideographs
    //(0xF900, 0xFAFF),
    // Alphabetic Presentation Forms
    //(0xFB00, 0xFB4F),
    // Arabic Presentation Forms-A
    //(0xFB50, 0xFDFF),
    // Variation Selectors
    //(0xFE00, 0xFE0F),
    // Vertical Forms
    //(0xFE10, 0xFE1F),
    // Combining Half Marks
    //(0xFE20, 0xFE2F),
    // CJK Compatibility Forms
    //(0xFE30, 0xFE4F),
    // Small Form Variants
    //(0xFE50, 0xFE6F),
    // Arabic Presentation Forms-B
    //(0xFE70, 0xFEFF),
    // Halfwidth and Fullwidth Forms
    //(0xFF00, 0xFFEF),
    // Specials
    //(0xFFF0, 0xFFFF),
    // Linear B Syllabary
    //(0x10000, 0x1007F),
    // Linear B Ideograms
    //(0x10080, 0x100FF),
    // Aegean Numbers
    //(0x10100, 0x1013F),
    // Ancient Greek Numbers
    //(0x10140, 0x1018F),
    // Ancient Symbols
    //(0x10190, 0x101CF),
    // Phaistos Disc
    //(0x101D0, 0x101FF),
    // Lycian
    //(0x10280, 0x1029F),
    // Carian
    //(0x102A0, 0x102DF),
    // Coptic Epact Numbers
    //(0x102E0, 0x102FF),
    // Old Italic
    //(0x10300, 0x1032F),
    // Gothic
    //(0x10330, 0x1034F),
    // Old Permic
    //(0x10350, 0x1037F),
    // Ugaritic
    //(0x10380, 0x1039F),
    // Old Persian
    //(0x103A0, 0x103DF),
    // Deseret
    //(0x10400, 0x1044F),
    // Shavian
    //(0x10450, 0x1047F),
    // Osmanya
    //(0x10480, 0x104AF),
    // Osage
    //(0x104B0, 0x104FF),
    // Elbasan
    //(0x10500, 0x1052F),
    // Caucasian Albanian
    //(0x10530, 0x1056F),
    // Vithkuqi
    //(0x10570, 0x105BF),
    // Linear A
    //(0x10600, 0x1077F),
    // Latin Extended-F
    //(0x10780, 0x107BF),
    // Cypriot Syllabary
    //(0x10800, 0x1083F),
    // Imperial Aramaic
    //(0x10840, 0x1085F),
    // Palmyrene
    //(0x10860, 0x1087F),
    // Nabataean
    //(0x10880, 0x108AF),
    // Hatran
    //(0x108E0, 0x108FF),
    // Phoenician
    //(0x10900, 0x1091F),
    // Lydian
    //(0x10920, 0x1093F),
    // Meroitic Hieroglyphs
    //(0x10980, 0x1099F),
    // Meroitic Cursive
    //(0x109A0, 0x109FF),
    // Kharoshthi
    //(0x10A00, 0x10A5F),
    // Old South Arabian
    //(0x10A60, 0x10A7F),
    // Old North Arabian
    //(0x10A80, 0x10A9F),
    // Manichaean
    //(0x10AC0, 0x10AFF),
    // Avestan
    //(0x10B00, 0x10B3F),
    // Inscriptional Parthian
    //(0x10B40, 0x10B5F),
    // Inscriptional Pahlavi
    //(0x10B60, 0x10B7F),
    // Psalter Pahlavi
    //(0x10B80, 0x10BAF),
    // Old Turkic
    //(0x10C00, 0x10C4F),
    // Old Hungarian
    //(0x10C80, 0x10CFF),
    // Hanifi Rohingya
    //(0x10D00, 0x10D3F),
    // Rumi Numeral Symbols
    //(0x10E60, 0x10E7F),
    // Yezidi
    //(0x10E80, 0x10EBF),
    // Arabic Extended-C
    //(0x10EC0, 0x10EFF),
    // Old Sogdian
    //(0x10F00, 0x10F2F),
    // Sogdian
    //(0x10F30, 0x10F6F),
    // Old Uyghur
    //(0x10F70, 0x10FAF),
    // Chorasmian
    //(0x10FB0, 0x10FDF),
    // Elymaic
    //(0x10FE0, 0x10FFF),
    // Brahmi
    //(0x11000, 0x1107F),
    // Kaithi
    //(0x11080, 0x110CF),
    // Sora Sompeng
    //(0x110D0, 0x110FF),
    // Chakma
    //(0x11100, 0x1114F),
    // Mahajani
    //(0x11150, 0x1117F),
    // Sharada
    //(0x11180, 0x111DF),
    // Sinhala Archaic Numbers
    //(0x111E0, 0x111FF),
    // Khojki
    //(0x11200, 0x1124F),
    // Multani
    //(0x11280, 0x112AF),
    // Khudawadi
    //(0x112B0, 0x112FF),
    // Grantha
    //(0x11300, 0x1137F),
    // Newa
    //(0x11400, 0x1147F),
    // Tirhuta
    //(0x11480, 0x114DF),
    // Siddham
    //(0x11580, 0x115FF),
    // Modi
    //(0x11600, 0x1165F),
    // Mongolian Supplement
    //(0x11660, 0x1167F),
    // Takri
    //(0x11680, 0x116CF),
    // Ahom
    //(0x11700, 0x1174F),
    // Dogra
    //(0x11800, 0x1184F),
    // Warang Citi
    //(0x118A0, 0x118FF),
    // Dives Akuru
    //(0x11900, 0x1195F),
    // Nandinagari
    //(0x119A0, 0x119FF),
    // Zanabazar Square
    //(0x11A00, 0x11A4F),
    // Soyombo
    //(0x11A50, 0x11AAF),
    // Unified Canadian Aboriginal Syllabics Extended-A
    //(0x11AB0, 0x11ABF),
    // Pau Cin Hau
    //(0x11AC0, 0x11AFF),
    // Devanagari Extended-A
    //(0x11B00, 0x11B5F),
    // Bhaiksuki
    //(0x11C00, 0x11C6F),
    // Marchen
    //(0x11C70, 0x11CBF),
    // Masaram Gondi
    //(0x11D00, 0x11D5F),
    // Gunjala Gondi
    //(0x11D60, 0x11DAF),
    // Makasar
    //(0x11EE0, 0x11EFF),
    // Kawi
    //(0x11F00, 0x11F5F),
    // Lisu Supplement
    //(0x11FB0, 0x11FBF),
    // Tamil Supplement
    //(0x11FC0, 0x11FFF),
    // Cuneiform
    //(0x12000, 0x123FF),
    // Cuneiform Numbers and Punctuation
    //(0x12400, 0x1247F),
    // Early Dynastic Cuneiform
    //(0x12480, 0x1254F),
    // Cypro-Minoan
    //(0x12F90, 0x12FFF),
    // Egyptian Hieroglyphs
    //(0x13000, 0x1342F),
    // Egyptian Hieroglyph Format Controls
    //(0x13430, 0x1345F),
    // Anatolian Hieroglyphs
    //(0x14400, 0x1467F),
    // Bamum Supplement
    //(0x16800, 0x16A3F),
    // Mro
    //(0x16A40, 0x16A6F),
    // Tangsa
    //(0x16A70, 0x16ACF),
    // Bassa Vah
    //(0x16AD0, 0x16AFF),
    // Pahawh Hmong
    //(0x16B00, 0x16B8F),
    // Medefaidrin
    //(0x16E40, 0x16E9F),
    // Miao
    //(0x16F00, 0x16F9F),
    // Ideographic Symbols and Punctuation
    //(0x16FE0, 0x16FFF),
    // Tangut
    //(0x17000, 0x187FF),
    // Tangut Components
    //(0x18800, 0x18AFF),
    // Khitan Small Script
    //(0x18B00, 0x18CFF),
    // Tangut Supplement
    //(0x18D00, 0x18D7F),
    // Kana Extended-B
    //(0x1AFF0, 0x1AFFF),
    // Kana Supplement
    //(0x1B000, 0x1B0FF),
    // Kana Extended-A
    //(0x1B100, 0x1B12F),
    // Small Kana Extension
    //(0x1B130, 0x1B16F),
    // Nushu
    //(0x1B170, 0x1B2FF),
    // Duployan
    //(0x1BC00, 0x1BC9F),
    // Shorthand Format Controls
    //(0x1BCA0, 0x1BCAF),
    // Znamenny Musical Notation
    //(0x1CF00, 0x1CFCF),
    // Byzantine Musical Symbols
    //(0x1D000, 0x1D0FF),
    // Musical Symbols
    (0x1D100, 0x1D1FF),
    // Ancient Greek Musical Notation
    //(0x1D200, 0x1D24F),
    // Kaktovik Numerals
    //(0x1D2C0, 0x1D2DF),
    // Mayan Numerals
    //(0x1D2E0, 0x1D2FF),
    // Tai Xuan Jing Symbols
    //(0x1D300, 0x1D35F),
    // Counting Rod Numerals
    //(0x1D360, 0x1D37F),
    // Mathematical Alphanumeric Symbols
    //(0x1D400, 0x1D7FF),
    // Sutton SignWriting
    //(0x1D800, 0x1DAAF),
    // Latin Extended-G
    //(0x1DF00, 0x1DFFF),
    // Glagolitic Supplement
    //(0x1E000, 0x1E02F),
    // Cyrillic Extended-D
    //(0x1E030, 0x1E08F),
    // Nyiakeng Puachue Hmong
    //(0x1E100, 0x1E14F),
    // Toto
    //(0x1E290, 0x1E2BF),
    // Wancho
    //(0x1E2C0, 0x1E2FF),
    // Nag Mundari
    //(0x1E4D0, 0x1E4FF),
    // Ethiopic Extended-B
    //(0x1E7E0, 0x1E7FF),
    // Mende Kikakui
    //(0x1E800, 0x1E8DF),
    // Adlam
    //(0x1E900, 0x1E95F),
    // Indic Siyaq Numbers
    //(0x1EC70, 0x1ECBF),
    // Ottoman Siyaq Numbers
    //(0x1ED00, 0x1ED4F),
    // Arabic Mathematical Alphabetic Symbols
    //(0x1EE00, 0x1EEFF),
    // Mahjong Tiles
    //(0x1F000, 0x1F02F),
    // Domino Tiles
    //(0x1F030, 0x1F09F),
    // Playing Cards
    //(0x1F0A0, 0x1F0FF),
    // Enclosed Alphanumeric Supplement
    //(0x1F100, 0x1F1FF),
    // Enclosed Ideographic Supplement
    //(0x1F200, 0x1F2FF),
    // Miscellaneous Symbols and Pictographs
    (0x1F300, 0x1F5FF),
    // Emoticons
    //(0x1F600, 0x1F64F),
    // Ornamental Dingbats
    //(0x1F650, 0x1F67F),
    // Transport and Map Symbols
    //(0x1F680, 0x1F6FF),
    // Alchemical Symbols
    //(0x1F700, 0x1F77F),
    // Geometric Shapes Extended
    //(0x1F780, 0x1F7FF),
    // Supplemental Arrows-C
    //(0x1F800, 0x1F8FF),
    // Supplemental Symbols and Pictographs
    (0x1F900, 0x1F9FF),
    // Chess Symbols
    //(0x1FA00, 0x1FA6F),
    // Symbols and Pictographs Extended-A
    //(0x1FA70, 0x1FAFF),
    // Symbols for Legacy Computing
    (0x1FB00, 0x1FBFF),
    // CJK Unified Ideographs Extension B
    //(0x20000, 0x2A6DF),
    // CJK Unified Ideographs Extension C
    //(0x2A700, 0x2B73F),
    // CJK Unified Ideographs Extension D
    //(0x2B740, 0x2B81F),
    // CJK Unified Ideographs Extension E
    //(0x2B820, 0x2CEAF),
    // CJK Unified Ideographs Extension F
    //(0x2CEB0, 0x2EBEF),
    // CJK Unified Ideographs Extension I
    //(0x2EBF0, 0x2EE5F),
    // CJK Compatibility Ideographs Supplement
    //(0x2F800, 0x2FA1F),
    // CJK Unified Ideographs Extension G
    //(0x30000, 0x3134F),
    // CJK Unified Ideographs Extension H
    //(0x31350, 0x323AF),
    // Tags
    //(0xE0000, 0xE007F),
    // Variation Selectors Supplement
    //(0xE0100, 0xE01EF),
    // Supplementary Private Use Area-A
    //(0xF0000, 0xFFFFF),
    // Supplementary Private Use Area-B
    //(0x100000, 0x10FFFF),
 ];
--- a/src/main.rs
+++ b/src/main.rs
@ -1,21 +1,15 @@
-use std::io::Write;
+mod blocks;
-fn main() -> std::io::Result<()> {
+fn main() {
-    let file = std::fs::OpenOptions::new()
+    for block in blocks::BLOCKS {
-        .write(true)
+        for i in (block.0)..(block.1) {
        .create(true)
        .open("all_chars.txt")?;
    let mut writer = std::io::BufWriter::new(file);
    for i in 0x0..(0x10FFFF + 1) {
            match char::from_u32(i) {
                Some(ch) => {
                    if ch.is_control() {
                        continue;
                    }
                    if let Some(name) = charname::get_name_checked(i) {
-                    write!(writer, "U+{:04X}\t{}\t{}\n", i, ch, name)?;
+                        println!("U+{:04X}\t{}\t{}", i, ch, name);
                    }
                }
                None => {
@ -23,6 +17,5 @@ fn main() -> std::io::Result<()> {
                }
            };
        }
-
+    }
    Ok(())
 }
		`@ -1,3 +0,0 @@`
			`perl -ne 'if (/Private Use/i) {next;} else {print;}' all_chars.txt > filtered.txt`

			`perl -F'\t' -ae 'chomp @F; print("$F[0] $F[1] ", ($F[2] =~ s/\b(.)(.*?)\b/\U$1\L$2/gr), "\n")' filtered.txt > formatted.txt`