made much better
This commit is contained in:
parent
206d4840ba
commit
39374944b7
5 changed files with 681 additions and 29 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
||||||
/target
|
/target
|
||||||
*.txt
|
*.txt
|
||||||
|
*.sh
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
Generate a file containing every single UTF8 codepoint, its glyph, and its name.
|
Prints unicode characters to stdout.
|
||||||
|
|
||||||
First do `cargo run`, then `sh filter_and_format.sh`. Your final file will be
|
Specify which unicode blocks you want by uncommenting them in `src/blocks.rs`.
|
||||||
called `formatted.txt`. Do what you want with it.
|
Do `cargo run` and the codepoint, glyph, and name of every character in the
|
||||||
|
specified blocks will be printed to stdout, separated by a tab `\t`.
|
||||||
|
|
||||||
|
That's it. Enjoy
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
perl -ne 'if (/Private Use/i) {next;} else {print;}' all_chars.txt > filtered.txt
|
|
||||||
|
|
||||||
perl -F'\t' -ae 'chomp @F; print("$F[0] $F[1] ", ($F[2] =~ s/\b(.)(.*?)\b/\U$1\L$2/gr), "\n")' filtered.txt > formatted.txt
|
|
658
src/blocks.rs
Normal file
658
src/blocks.rs
Normal file
|
@ -0,0 +1,658 @@
|
||||||
|
pub const BLOCKS: &[(u32, u32)] = &[
|
||||||
|
// Basic Latin
|
||||||
|
(0x0000, 0x007F),
|
||||||
|
// Latin-1 Supplement
|
||||||
|
(0x0080, 0x00FF),
|
||||||
|
// Latin Extended-A
|
||||||
|
(0x0100, 0x017F),
|
||||||
|
// Latin Extended-B
|
||||||
|
(0x0180, 0x024F),
|
||||||
|
// IPA Extensions
|
||||||
|
(0x0250, 0x02AF),
|
||||||
|
// Spacing Modifier Letters
|
||||||
|
//(0x02B0, 0x02FF),
|
||||||
|
// Combining Diacritical Marks
|
||||||
|
//(0x0300, 0x036F),
|
||||||
|
// Greek and Coptic
|
||||||
|
(0x0370, 0x03FF),
|
||||||
|
// Cyrillic
|
||||||
|
(0x0400, 0x04FF),
|
||||||
|
// Cyrillic Supplement
|
||||||
|
(0x0500, 0x052F),
|
||||||
|
// Armenian
|
||||||
|
//(0x0530, 0x058F),
|
||||||
|
// Hebrew
|
||||||
|
//(0x0590, 0x05FF),
|
||||||
|
// Arabic
|
||||||
|
//(0x0600, 0x06FF),
|
||||||
|
// Syriac
|
||||||
|
//(0x0700, 0x074F),
|
||||||
|
// Arabic Supplement
|
||||||
|
//(0x0750, 0x077F),
|
||||||
|
// Thaana
|
||||||
|
//(0x0780, 0x07BF),
|
||||||
|
// NKo
|
||||||
|
//(0x07C0, 0x07FF),
|
||||||
|
// Samaritan
|
||||||
|
//(0x0800, 0x083F),
|
||||||
|
// Mandaic
|
||||||
|
//(0x0840, 0x085F),
|
||||||
|
// Syriac Supplement
|
||||||
|
//(0x0860, 0x086F),
|
||||||
|
// Arabic Extended-B
|
||||||
|
//(0x0870, 0x089F),
|
||||||
|
// Arabic Extended-A
|
||||||
|
//(0x08A0, 0x08FF),
|
||||||
|
// Devanagari
|
||||||
|
//(0x0900, 0x097F),
|
||||||
|
// Bengali
|
||||||
|
//(0x0980, 0x09FF),
|
||||||
|
// Gurmukhi
|
||||||
|
//(0x0A00, 0x0A7F),
|
||||||
|
// Gujarati
|
||||||
|
//(0x0A80, 0x0AFF),
|
||||||
|
// Oriya
|
||||||
|
//(0x0B00, 0x0B7F),
|
||||||
|
// Tamil
|
||||||
|
//(0x0B80, 0x0BFF),
|
||||||
|
// Telugu
|
||||||
|
//(0x0C00, 0x0C7F),
|
||||||
|
// Kannada
|
||||||
|
//(0x0C80, 0x0CFF),
|
||||||
|
// Malayalam
|
||||||
|
//(0x0D00, 0x0D7F),
|
||||||
|
// Sinhala
|
||||||
|
//(0x0D80, 0x0DFF),
|
||||||
|
// Thai
|
||||||
|
//(0x0E00, 0x0E7F),
|
||||||
|
// Lao
|
||||||
|
//(0x0E80, 0x0EFF),
|
||||||
|
// Tibetan
|
||||||
|
//(0x0F00, 0x0FFF),
|
||||||
|
// Myanmar
|
||||||
|
//(0x1000, 0x109F),
|
||||||
|
// Georgian
|
||||||
|
//(0x10A0, 0x10FF),
|
||||||
|
// Hangul Jamo
|
||||||
|
//(0x1100, 0x11FF),
|
||||||
|
// Ethiopic
|
||||||
|
//(0x1200, 0x137F),
|
||||||
|
// Ethiopic Supplement
|
||||||
|
//(0x1380, 0x139F),
|
||||||
|
// Cherokee
|
||||||
|
//(0x13A0, 0x13FF),
|
||||||
|
// Unified Canadian Aboriginal Syllabics
|
||||||
|
//(0x1400, 0x167F),
|
||||||
|
// Ogham
|
||||||
|
//(0x1680, 0x169F),
|
||||||
|
// Runic
|
||||||
|
//(0x16A0, 0x16FF),
|
||||||
|
// Tagalog
|
||||||
|
//(0x1700, 0x171F),
|
||||||
|
// Hanunoo
|
||||||
|
//(0x1720, 0x173F),
|
||||||
|
// Buhid
|
||||||
|
//(0x1740, 0x175F),
|
||||||
|
// Tagbanwa
|
||||||
|
//(0x1760, 0x177F),
|
||||||
|
// Khmer
|
||||||
|
//(0x1780, 0x17FF),
|
||||||
|
// Mongolian
|
||||||
|
//(0x1800, 0x18AF),
|
||||||
|
// Unified Canadian Aboriginal Syllabics Extended
|
||||||
|
//(0x18B0, 0x18FF),
|
||||||
|
// Limbu
|
||||||
|
//(0x1900, 0x194F),
|
||||||
|
// Tai Le
|
||||||
|
//(0x1950, 0x197F),
|
||||||
|
// New Tai Lue
|
||||||
|
//(0x1980, 0x19DF),
|
||||||
|
// Khmer Symbols
|
||||||
|
//(0x19E0, 0x19FF),
|
||||||
|
// Buginese
|
||||||
|
//(0x1A00, 0x1A1F),
|
||||||
|
// Tai Tham
|
||||||
|
//(0x1A20, 0x1AAF),
|
||||||
|
// Combining Diacritical Marks Extended
|
||||||
|
//(0x1AB0, 0x1AFF),
|
||||||
|
// Balinese
|
||||||
|
//(0x1B00, 0x1B7F),
|
||||||
|
// Sundanese
|
||||||
|
//(0x1B80, 0x1BBF),
|
||||||
|
// Batak
|
||||||
|
//(0x1BC0, 0x1BFF),
|
||||||
|
// Lepcha
|
||||||
|
//(0x1C00, 0x1C4F),
|
||||||
|
// Ol Chiki
|
||||||
|
//(0x1C50, 0x1C7F),
|
||||||
|
// Cyrillic Extended-C
|
||||||
|
//(0x1C80, 0x1C8F),
|
||||||
|
// Georgian Extended
|
||||||
|
//(0x1C90, 0x1CBF),
|
||||||
|
// Sundanese Supplement
|
||||||
|
//(0x1CC0, 0x1CCF),
|
||||||
|
// Vedic Extensions
|
||||||
|
//(0x1CD0, 0x1CFF),
|
||||||
|
// Phonetic Extensions
|
||||||
|
//(0x1D00, 0x1D7F),
|
||||||
|
// Phonetic Extensions Supplement
|
||||||
|
//(0x1D80, 0x1DBF),
|
||||||
|
// Combining Diacritical Marks Supplement
|
||||||
|
//(0x1DC0, 0x1DFF),
|
||||||
|
// Latin Extended Additional
|
||||||
|
(0x1E00, 0x1EFF),
|
||||||
|
// Greek Extended
|
||||||
|
(0x1F00, 0x1FFF),
|
||||||
|
// General Punctuation
|
||||||
|
(0x2000, 0x206F),
|
||||||
|
// Superscripts and Subscripts
|
||||||
|
(0x2070, 0x209F),
|
||||||
|
// Currency Symbols
|
||||||
|
(0x20A0, 0x20CF),
|
||||||
|
// Combining Diacritical Marks for Symbols
|
||||||
|
(0x20D0, 0x20FF),
|
||||||
|
// Letterlike Symbols
|
||||||
|
(0x2100, 0x214F),
|
||||||
|
// Number Forms
|
||||||
|
//(0x2150, 0x218F),
|
||||||
|
// Arrows
|
||||||
|
(0x2190, 0x21FF),
|
||||||
|
// Mathematical Operators
|
||||||
|
(0x2200, 0x22FF),
|
||||||
|
// Miscellaneous Technical
|
||||||
|
(0x2300, 0x23FF),
|
||||||
|
// Control Pictures
|
||||||
|
(0x2400, 0x243F),
|
||||||
|
// Optical Character Recognition
|
||||||
|
//(0x2440, 0x245F),
|
||||||
|
// Enclosed Alphanumerics
|
||||||
|
//(0x2460, 0x24FF),
|
||||||
|
// Box Drawing
|
||||||
|
(0x2500, 0x257F),
|
||||||
|
// Block Elements
|
||||||
|
(0x2580, 0x259F),
|
||||||
|
// Geometric Shapes
|
||||||
|
(0x25A0, 0x25FF),
|
||||||
|
// Miscellaneous Symbols
|
||||||
|
(0x2600, 0x26FF),
|
||||||
|
// Dingbats
|
||||||
|
(0x2700, 0x27BF),
|
||||||
|
// Miscellaneous Mathematical Symbols-A
|
||||||
|
(0x27C0, 0x27EF),
|
||||||
|
// Supplemental Arrows-A
|
||||||
|
(0x27F0, 0x27FF),
|
||||||
|
// Braille Patterns
|
||||||
|
//(0x2800, 0x28FF),
|
||||||
|
// Supplemental Arrows-B
|
||||||
|
(0x2900, 0x297F),
|
||||||
|
// Miscellaneous Mathematical Symbols-B
|
||||||
|
(0x2980, 0x29FF),
|
||||||
|
// Supplemental Mathematical Operators
|
||||||
|
(0x2A00, 0x2AFF),
|
||||||
|
// Miscellaneous Symbols and Arrows
|
||||||
|
(0x2B00, 0x2BFF),
|
||||||
|
// Glagolitic
|
||||||
|
//(0x2C00, 0x2C5F),
|
||||||
|
// Latin Extended-C
|
||||||
|
(0x2C60, 0x2C7F),
|
||||||
|
// Coptic
|
||||||
|
//(0x2C80, 0x2CFF),
|
||||||
|
// Georgian Supplement
|
||||||
|
//(0x2D00, 0x2D2F),
|
||||||
|
// Tifinagh
|
||||||
|
//(0x2D30, 0x2D7F),
|
||||||
|
// Ethiopic Extended
|
||||||
|
//(0x2D80, 0x2DDF),
|
||||||
|
// Cyrillic Extended-A
|
||||||
|
(0x2DE0, 0x2DFF),
|
||||||
|
// Supplemental Punctuation
|
||||||
|
//(0x2E00, 0x2E7F),
|
||||||
|
// CJK Radicals Supplement
|
||||||
|
//(0x2E80, 0x2EFF),
|
||||||
|
// Kangxi Radicals
|
||||||
|
//(0x2F00, 0x2FDF),
|
||||||
|
// Ideographic Description Characters
|
||||||
|
//(0x2FF0, 0x2FFF),
|
||||||
|
// CJK Symbols and Punctuation
|
||||||
|
//(0x3000, 0x303F),
|
||||||
|
// Hiragana
|
||||||
|
//(0x3040, 0x309F),
|
||||||
|
// Katakana
|
||||||
|
//(0x30A0, 0x30FF),
|
||||||
|
// Bopomofo
|
||||||
|
//(0x3100, 0x312F),
|
||||||
|
// Hangul Compatibility Jamo
|
||||||
|
//(0x3130, 0x318F),
|
||||||
|
// Kanbun
|
||||||
|
//(0x3190, 0x319F),
|
||||||
|
// Bopomofo Extended
|
||||||
|
//(0x31A0, 0x31BF),
|
||||||
|
// CJK Strokes
|
||||||
|
//(0x31C0, 0x31EF),
|
||||||
|
// Katakana Phonetic Extensions
|
||||||
|
//(0x31F0, 0x31FF),
|
||||||
|
// Enclosed CJK Letters and Months
|
||||||
|
//(0x3200, 0x32FF),
|
||||||
|
// CJK Compatibility
|
||||||
|
//(0x3300, 0x33FF),
|
||||||
|
// CJK Unified Ideographs Extension A
|
||||||
|
//(0x3400, 0x4DBF),
|
||||||
|
// Yijing Hexagram Symbols
|
||||||
|
//(0x4DC0, 0x4DFF),
|
||||||
|
// CJK Unified Ideographs
|
||||||
|
//(0x4E00, 0x9FFF),
|
||||||
|
// Yi Syllables
|
||||||
|
//(0xA000, 0xA48F),
|
||||||
|
// Yi Radicals
|
||||||
|
//(0xA490, 0xA4CF),
|
||||||
|
// Lisu
|
||||||
|
//(0xA4D0, 0xA4FF),
|
||||||
|
// Vai
|
||||||
|
//(0xA500, 0xA63F),
|
||||||
|
// Cyrillic Extended-B
|
||||||
|
//(0xA640, 0xA69F),
|
||||||
|
// Bamum
|
||||||
|
//(0xA6A0, 0xA6FF),
|
||||||
|
// Modifier Tone Letters
|
||||||
|
//(0xA700, 0xA71F),
|
||||||
|
// Latin Extended-D
|
||||||
|
(0xA720, 0xA7FF),
|
||||||
|
// Syloti Nagri
|
||||||
|
//(0xA800, 0xA82F),
|
||||||
|
// Common Indic Number Forms
|
||||||
|
//(0xA830, 0xA83F),
|
||||||
|
// Phags-pa
|
||||||
|
//(0xA840, 0xA87F),
|
||||||
|
// Saurashtra
|
||||||
|
//(0xA880, 0xA8DF),
|
||||||
|
// Devanagari Extended
|
||||||
|
//(0xA8E0, 0xA8FF),
|
||||||
|
// Kayah Li
|
||||||
|
//(0xA900, 0xA92F),
|
||||||
|
// Rejang
|
||||||
|
//(0xA930, 0xA95F),
|
||||||
|
// Hangul Jamo Extended-A
|
||||||
|
//(0xA960, 0xA97F),
|
||||||
|
// Javanese
|
||||||
|
//(0xA980, 0xA9DF),
|
||||||
|
// Myanmar Extended-B
|
||||||
|
//(0xA9E0, 0xA9FF),
|
||||||
|
// Cham
|
||||||
|
//(0xAA00, 0xAA5F),
|
||||||
|
// Myanmar Extended-A
|
||||||
|
//(0xAA60, 0xAA7F),
|
||||||
|
// Tai Viet
|
||||||
|
//(0xAA80, 0xAADF),
|
||||||
|
// Meetei Mayek Extensions
|
||||||
|
//(0xAAE0, 0xAAFF),
|
||||||
|
// Ethiopic Extended-A
|
||||||
|
//(0xAB00, 0xAB2F),
|
||||||
|
// Latin Extended-E
|
||||||
|
//(0xAB30, 0xAB6F),
|
||||||
|
// Cherokee Supplement
|
||||||
|
//(0xAB70, 0xABBF),
|
||||||
|
// Meetei Mayek
|
||||||
|
//(0xABC0, 0xABFF),
|
||||||
|
// Hangul Syllables
|
||||||
|
//(0xAC00, 0xD7AF),
|
||||||
|
// Hangul Jamo Extended-B
|
||||||
|
//(0xD7B0, 0xD7FF),
|
||||||
|
// High Surrogates
|
||||||
|
//(0xD800, 0xDB7F),
|
||||||
|
// High Private Use Surrogates
|
||||||
|
//(0xDB80, 0xDBFF),
|
||||||
|
// Low Surrogates
|
||||||
|
//(0xDC00, 0xDFFF),
|
||||||
|
// Private Use Area
|
||||||
|
//(0xE000, 0xF8FF),
|
||||||
|
// CJK Compatibility Ideographs
|
||||||
|
//(0xF900, 0xFAFF),
|
||||||
|
// Alphabetic Presentation Forms
|
||||||
|
//(0xFB00, 0xFB4F),
|
||||||
|
// Arabic Presentation Forms-A
|
||||||
|
//(0xFB50, 0xFDFF),
|
||||||
|
// Variation Selectors
|
||||||
|
//(0xFE00, 0xFE0F),
|
||||||
|
// Vertical Forms
|
||||||
|
//(0xFE10, 0xFE1F),
|
||||||
|
// Combining Half Marks
|
||||||
|
//(0xFE20, 0xFE2F),
|
||||||
|
// CJK Compatibility Forms
|
||||||
|
//(0xFE30, 0xFE4F),
|
||||||
|
// Small Form Variants
|
||||||
|
//(0xFE50, 0xFE6F),
|
||||||
|
// Arabic Presentation Forms-B
|
||||||
|
//(0xFE70, 0xFEFF),
|
||||||
|
// Halfwidth and Fullwidth Forms
|
||||||
|
//(0xFF00, 0xFFEF),
|
||||||
|
// Specials
|
||||||
|
//(0xFFF0, 0xFFFF),
|
||||||
|
// Linear B Syllabary
|
||||||
|
//(0x10000, 0x1007F),
|
||||||
|
// Linear B Ideograms
|
||||||
|
//(0x10080, 0x100FF),
|
||||||
|
// Aegean Numbers
|
||||||
|
//(0x10100, 0x1013F),
|
||||||
|
// Ancient Greek Numbers
|
||||||
|
//(0x10140, 0x1018F),
|
||||||
|
// Ancient Symbols
|
||||||
|
//(0x10190, 0x101CF),
|
||||||
|
// Phaistos Disc
|
||||||
|
//(0x101D0, 0x101FF),
|
||||||
|
// Lycian
|
||||||
|
//(0x10280, 0x1029F),
|
||||||
|
// Carian
|
||||||
|
//(0x102A0, 0x102DF),
|
||||||
|
// Coptic Epact Numbers
|
||||||
|
//(0x102E0, 0x102FF),
|
||||||
|
// Old Italic
|
||||||
|
//(0x10300, 0x1032F),
|
||||||
|
// Gothic
|
||||||
|
//(0x10330, 0x1034F),
|
||||||
|
// Old Permic
|
||||||
|
//(0x10350, 0x1037F),
|
||||||
|
// Ugaritic
|
||||||
|
//(0x10380, 0x1039F),
|
||||||
|
// Old Persian
|
||||||
|
//(0x103A0, 0x103DF),
|
||||||
|
// Deseret
|
||||||
|
//(0x10400, 0x1044F),
|
||||||
|
// Shavian
|
||||||
|
//(0x10450, 0x1047F),
|
||||||
|
// Osmanya
|
||||||
|
//(0x10480, 0x104AF),
|
||||||
|
// Osage
|
||||||
|
//(0x104B0, 0x104FF),
|
||||||
|
// Elbasan
|
||||||
|
//(0x10500, 0x1052F),
|
||||||
|
// Caucasian Albanian
|
||||||
|
//(0x10530, 0x1056F),
|
||||||
|
// Vithkuqi
|
||||||
|
//(0x10570, 0x105BF),
|
||||||
|
// Linear A
|
||||||
|
//(0x10600, 0x1077F),
|
||||||
|
// Latin Extended-F
|
||||||
|
//(0x10780, 0x107BF),
|
||||||
|
// Cypriot Syllabary
|
||||||
|
//(0x10800, 0x1083F),
|
||||||
|
// Imperial Aramaic
|
||||||
|
//(0x10840, 0x1085F),
|
||||||
|
// Palmyrene
|
||||||
|
//(0x10860, 0x1087F),
|
||||||
|
// Nabataean
|
||||||
|
//(0x10880, 0x108AF),
|
||||||
|
// Hatran
|
||||||
|
//(0x108E0, 0x108FF),
|
||||||
|
// Phoenician
|
||||||
|
//(0x10900, 0x1091F),
|
||||||
|
// Lydian
|
||||||
|
//(0x10920, 0x1093F),
|
||||||
|
// Meroitic Hieroglyphs
|
||||||
|
//(0x10980, 0x1099F),
|
||||||
|
// Meroitic Cursive
|
||||||
|
//(0x109A0, 0x109FF),
|
||||||
|
// Kharoshthi
|
||||||
|
//(0x10A00, 0x10A5F),
|
||||||
|
// Old South Arabian
|
||||||
|
//(0x10A60, 0x10A7F),
|
||||||
|
// Old North Arabian
|
||||||
|
//(0x10A80, 0x10A9F),
|
||||||
|
// Manichaean
|
||||||
|
//(0x10AC0, 0x10AFF),
|
||||||
|
// Avestan
|
||||||
|
//(0x10B00, 0x10B3F),
|
||||||
|
// Inscriptional Parthian
|
||||||
|
//(0x10B40, 0x10B5F),
|
||||||
|
// Inscriptional Pahlavi
|
||||||
|
//(0x10B60, 0x10B7F),
|
||||||
|
// Psalter Pahlavi
|
||||||
|
//(0x10B80, 0x10BAF),
|
||||||
|
// Old Turkic
|
||||||
|
//(0x10C00, 0x10C4F),
|
||||||
|
// Old Hungarian
|
||||||
|
//(0x10C80, 0x10CFF),
|
||||||
|
// Hanifi Rohingya
|
||||||
|
//(0x10D00, 0x10D3F),
|
||||||
|
// Rumi Numeral Symbols
|
||||||
|
//(0x10E60, 0x10E7F),
|
||||||
|
// Yezidi
|
||||||
|
//(0x10E80, 0x10EBF),
|
||||||
|
// Arabic Extended-C
|
||||||
|
//(0x10EC0, 0x10EFF),
|
||||||
|
// Old Sogdian
|
||||||
|
//(0x10F00, 0x10F2F),
|
||||||
|
// Sogdian
|
||||||
|
//(0x10F30, 0x10F6F),
|
||||||
|
// Old Uyghur
|
||||||
|
//(0x10F70, 0x10FAF),
|
||||||
|
// Chorasmian
|
||||||
|
//(0x10FB0, 0x10FDF),
|
||||||
|
// Elymaic
|
||||||
|
//(0x10FE0, 0x10FFF),
|
||||||
|
// Brahmi
|
||||||
|
//(0x11000, 0x1107F),
|
||||||
|
// Kaithi
|
||||||
|
//(0x11080, 0x110CF),
|
||||||
|
// Sora Sompeng
|
||||||
|
//(0x110D0, 0x110FF),
|
||||||
|
// Chakma
|
||||||
|
//(0x11100, 0x1114F),
|
||||||
|
// Mahajani
|
||||||
|
//(0x11150, 0x1117F),
|
||||||
|
// Sharada
|
||||||
|
//(0x11180, 0x111DF),
|
||||||
|
// Sinhala Archaic Numbers
|
||||||
|
//(0x111E0, 0x111FF),
|
||||||
|
// Khojki
|
||||||
|
//(0x11200, 0x1124F),
|
||||||
|
// Multani
|
||||||
|
//(0x11280, 0x112AF),
|
||||||
|
// Khudawadi
|
||||||
|
//(0x112B0, 0x112FF),
|
||||||
|
// Grantha
|
||||||
|
//(0x11300, 0x1137F),
|
||||||
|
// Newa
|
||||||
|
//(0x11400, 0x1147F),
|
||||||
|
// Tirhuta
|
||||||
|
//(0x11480, 0x114DF),
|
||||||
|
// Siddham
|
||||||
|
//(0x11580, 0x115FF),
|
||||||
|
// Modi
|
||||||
|
//(0x11600, 0x1165F),
|
||||||
|
// Mongolian Supplement
|
||||||
|
//(0x11660, 0x1167F),
|
||||||
|
// Takri
|
||||||
|
//(0x11680, 0x116CF),
|
||||||
|
// Ahom
|
||||||
|
//(0x11700, 0x1174F),
|
||||||
|
// Dogra
|
||||||
|
//(0x11800, 0x1184F),
|
||||||
|
// Warang Citi
|
||||||
|
//(0x118A0, 0x118FF),
|
||||||
|
// Dives Akuru
|
||||||
|
//(0x11900, 0x1195F),
|
||||||
|
// Nandinagari
|
||||||
|
//(0x119A0, 0x119FF),
|
||||||
|
// Zanabazar Square
|
||||||
|
//(0x11A00, 0x11A4F),
|
||||||
|
// Soyombo
|
||||||
|
//(0x11A50, 0x11AAF),
|
||||||
|
// Unified Canadian Aboriginal Syllabics Extended-A
|
||||||
|
//(0x11AB0, 0x11ABF),
|
||||||
|
// Pau Cin Hau
|
||||||
|
//(0x11AC0, 0x11AFF),
|
||||||
|
// Devanagari Extended-A
|
||||||
|
//(0x11B00, 0x11B5F),
|
||||||
|
// Bhaiksuki
|
||||||
|
//(0x11C00, 0x11C6F),
|
||||||
|
// Marchen
|
||||||
|
//(0x11C70, 0x11CBF),
|
||||||
|
// Masaram Gondi
|
||||||
|
//(0x11D00, 0x11D5F),
|
||||||
|
// Gunjala Gondi
|
||||||
|
//(0x11D60, 0x11DAF),
|
||||||
|
// Makasar
|
||||||
|
//(0x11EE0, 0x11EFF),
|
||||||
|
// Kawi
|
||||||
|
//(0x11F00, 0x11F5F),
|
||||||
|
// Lisu Supplement
|
||||||
|
//(0x11FB0, 0x11FBF),
|
||||||
|
// Tamil Supplement
|
||||||
|
//(0x11FC0, 0x11FFF),
|
||||||
|
// Cuneiform
|
||||||
|
//(0x12000, 0x123FF),
|
||||||
|
// Cuneiform Numbers and Punctuation
|
||||||
|
//(0x12400, 0x1247F),
|
||||||
|
// Early Dynastic Cuneiform
|
||||||
|
//(0x12480, 0x1254F),
|
||||||
|
// Cypro-Minoan
|
||||||
|
//(0x12F90, 0x12FFF),
|
||||||
|
// Egyptian Hieroglyphs
|
||||||
|
//(0x13000, 0x1342F),
|
||||||
|
// Egyptian Hieroglyph Format Controls
|
||||||
|
//(0x13430, 0x1345F),
|
||||||
|
// Anatolian Hieroglyphs
|
||||||
|
//(0x14400, 0x1467F),
|
||||||
|
// Bamum Supplement
|
||||||
|
//(0x16800, 0x16A3F),
|
||||||
|
// Mro
|
||||||
|
//(0x16A40, 0x16A6F),
|
||||||
|
// Tangsa
|
||||||
|
//(0x16A70, 0x16ACF),
|
||||||
|
// Bassa Vah
|
||||||
|
//(0x16AD0, 0x16AFF),
|
||||||
|
// Pahawh Hmong
|
||||||
|
//(0x16B00, 0x16B8F),
|
||||||
|
// Medefaidrin
|
||||||
|
//(0x16E40, 0x16E9F),
|
||||||
|
// Miao
|
||||||
|
//(0x16F00, 0x16F9F),
|
||||||
|
// Ideographic Symbols and Punctuation
|
||||||
|
//(0x16FE0, 0x16FFF),
|
||||||
|
// Tangut
|
||||||
|
//(0x17000, 0x187FF),
|
||||||
|
// Tangut Components
|
||||||
|
//(0x18800, 0x18AFF),
|
||||||
|
// Khitan Small Script
|
||||||
|
//(0x18B00, 0x18CFF),
|
||||||
|
// Tangut Supplement
|
||||||
|
//(0x18D00, 0x18D7F),
|
||||||
|
// Kana Extended-B
|
||||||
|
//(0x1AFF0, 0x1AFFF),
|
||||||
|
// Kana Supplement
|
||||||
|
//(0x1B000, 0x1B0FF),
|
||||||
|
// Kana Extended-A
|
||||||
|
//(0x1B100, 0x1B12F),
|
||||||
|
// Small Kana Extension
|
||||||
|
//(0x1B130, 0x1B16F),
|
||||||
|
// Nushu
|
||||||
|
//(0x1B170, 0x1B2FF),
|
||||||
|
// Duployan
|
||||||
|
//(0x1BC00, 0x1BC9F),
|
||||||
|
// Shorthand Format Controls
|
||||||
|
//(0x1BCA0, 0x1BCAF),
|
||||||
|
// Znamenny Musical Notation
|
||||||
|
//(0x1CF00, 0x1CFCF),
|
||||||
|
// Byzantine Musical Symbols
|
||||||
|
//(0x1D000, 0x1D0FF),
|
||||||
|
// Musical Symbols
|
||||||
|
(0x1D100, 0x1D1FF),
|
||||||
|
// Ancient Greek Musical Notation
|
||||||
|
//(0x1D200, 0x1D24F),
|
||||||
|
// Kaktovik Numerals
|
||||||
|
//(0x1D2C0, 0x1D2DF),
|
||||||
|
// Mayan Numerals
|
||||||
|
//(0x1D2E0, 0x1D2FF),
|
||||||
|
// Tai Xuan Jing Symbols
|
||||||
|
//(0x1D300, 0x1D35F),
|
||||||
|
// Counting Rod Numerals
|
||||||
|
//(0x1D360, 0x1D37F),
|
||||||
|
// Mathematical Alphanumeric Symbols
|
||||||
|
//(0x1D400, 0x1D7FF),
|
||||||
|
// Sutton SignWriting
|
||||||
|
//(0x1D800, 0x1DAAF),
|
||||||
|
// Latin Extended-G
|
||||||
|
//(0x1DF00, 0x1DFFF),
|
||||||
|
// Glagolitic Supplement
|
||||||
|
//(0x1E000, 0x1E02F),
|
||||||
|
// Cyrillic Extended-D
|
||||||
|
//(0x1E030, 0x1E08F),
|
||||||
|
// Nyiakeng Puachue Hmong
|
||||||
|
//(0x1E100, 0x1E14F),
|
||||||
|
// Toto
|
||||||
|
//(0x1E290, 0x1E2BF),
|
||||||
|
// Wancho
|
||||||
|
//(0x1E2C0, 0x1E2FF),
|
||||||
|
// Nag Mundari
|
||||||
|
//(0x1E4D0, 0x1E4FF),
|
||||||
|
// Ethiopic Extended-B
|
||||||
|
//(0x1E7E0, 0x1E7FF),
|
||||||
|
// Mende Kikakui
|
||||||
|
//(0x1E800, 0x1E8DF),
|
||||||
|
// Adlam
|
||||||
|
//(0x1E900, 0x1E95F),
|
||||||
|
// Indic Siyaq Numbers
|
||||||
|
//(0x1EC70, 0x1ECBF),
|
||||||
|
// Ottoman Siyaq Numbers
|
||||||
|
//(0x1ED00, 0x1ED4F),
|
||||||
|
// Arabic Mathematical Alphabetic Symbols
|
||||||
|
//(0x1EE00, 0x1EEFF),
|
||||||
|
// Mahjong Tiles
|
||||||
|
//(0x1F000, 0x1F02F),
|
||||||
|
// Domino Tiles
|
||||||
|
//(0x1F030, 0x1F09F),
|
||||||
|
// Playing Cards
|
||||||
|
//(0x1F0A0, 0x1F0FF),
|
||||||
|
// Enclosed Alphanumeric Supplement
|
||||||
|
//(0x1F100, 0x1F1FF),
|
||||||
|
// Enclosed Ideographic Supplement
|
||||||
|
//(0x1F200, 0x1F2FF),
|
||||||
|
// Miscellaneous Symbols and Pictographs
|
||||||
|
(0x1F300, 0x1F5FF),
|
||||||
|
// Emoticons
|
||||||
|
//(0x1F600, 0x1F64F),
|
||||||
|
// Ornamental Dingbats
|
||||||
|
//(0x1F650, 0x1F67F),
|
||||||
|
// Transport and Map Symbols
|
||||||
|
//(0x1F680, 0x1F6FF),
|
||||||
|
// Alchemical Symbols
|
||||||
|
//(0x1F700, 0x1F77F),
|
||||||
|
// Geometric Shapes Extended
|
||||||
|
//(0x1F780, 0x1F7FF),
|
||||||
|
// Supplemental Arrows-C
|
||||||
|
//(0x1F800, 0x1F8FF),
|
||||||
|
// Supplemental Symbols and Pictographs
|
||||||
|
(0x1F900, 0x1F9FF),
|
||||||
|
// Chess Symbols
|
||||||
|
//(0x1FA00, 0x1FA6F),
|
||||||
|
// Symbols and Pictographs Extended-A
|
||||||
|
//(0x1FA70, 0x1FAFF),
|
||||||
|
// Symbols for Legacy Computing
|
||||||
|
(0x1FB00, 0x1FBFF),
|
||||||
|
// CJK Unified Ideographs Extension B
|
||||||
|
//(0x20000, 0x2A6DF),
|
||||||
|
// CJK Unified Ideographs Extension C
|
||||||
|
//(0x2A700, 0x2B73F),
|
||||||
|
// CJK Unified Ideographs Extension D
|
||||||
|
//(0x2B740, 0x2B81F),
|
||||||
|
// CJK Unified Ideographs Extension E
|
||||||
|
//(0x2B820, 0x2CEAF),
|
||||||
|
// CJK Unified Ideographs Extension F
|
||||||
|
//(0x2CEB0, 0x2EBEF),
|
||||||
|
// CJK Unified Ideographs Extension I
|
||||||
|
//(0x2EBF0, 0x2EE5F),
|
||||||
|
// CJK Compatibility Ideographs Supplement
|
||||||
|
//(0x2F800, 0x2FA1F),
|
||||||
|
// CJK Unified Ideographs Extension G
|
||||||
|
//(0x30000, 0x3134F),
|
||||||
|
// CJK Unified Ideographs Extension H
|
||||||
|
//(0x31350, 0x323AF),
|
||||||
|
// Tags
|
||||||
|
//(0xE0000, 0xE007F),
|
||||||
|
// Variation Selectors Supplement
|
||||||
|
//(0xE0100, 0xE01EF),
|
||||||
|
// Supplementary Private Use Area-A
|
||||||
|
//(0xF0000, 0xFFFFF),
|
||||||
|
// Supplementary Private Use Area-B
|
||||||
|
//(0x100000, 0x10FFFF),
|
||||||
|
];
|
19
src/main.rs
19
src/main.rs
|
@ -1,21 +1,15 @@
|
||||||
use std::io::Write;
|
mod blocks;
|
||||||
|
|
||||||
fn main() -> std::io::Result<()> {
|
fn main() {
|
||||||
let file = std::fs::OpenOptions::new()
|
for block in blocks::BLOCKS {
|
||||||
.write(true)
|
for i in (block.0)..(block.1) {
|
||||||
.create(true)
|
|
||||||
.open("all_chars.txt")?;
|
|
||||||
|
|
||||||
let mut writer = std::io::BufWriter::new(file);
|
|
||||||
|
|
||||||
for i in 0x0..(0x10FFFF + 1) {
|
|
||||||
match char::from_u32(i) {
|
match char::from_u32(i) {
|
||||||
Some(ch) => {
|
Some(ch) => {
|
||||||
if ch.is_control() {
|
if ch.is_control() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if let Some(name) = charname::get_name_checked(i) {
|
if let Some(name) = charname::get_name_checked(i) {
|
||||||
write!(writer, "U+{:04X}\t{}\t{}\n", i, ch, name)?;
|
println!("U+{:04X}\t{}\t{}", i, ch, name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
@ -23,6 +17,5 @@ fn main() -> std::io::Result<()> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue