From 206d4840ba57d466c9923997d846828ccfa409d2 Mon Sep 17 00:00:00 2001 From: Noah Swerhun Date: Fri, 8 Mar 2024 14:57:28 -0600 Subject: [PATCH] inital commit. feature complete --- .gitignore | 2 ++ Cargo.lock | 79 ++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 9 +++++ README.md | 4 +++ filter_and_format.sh | 3 ++ src/main.rs | 28 ++++++++++++++++ 6 files changed, 125 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 filter_and_format.sh create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eed3cc5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +*.txt diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..2ed5f04 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,79 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "charname" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae20be7f5840b66fb8ffe9750ff963eed482984934484815a49dc8855ca80e1" +dependencies = [ + "phf", + "phf_codegen", +] + +[[package]] +name = "chars" +version = "0.1.0" +dependencies = [ + "charname", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..36526bf --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "chars" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +charname = "0.2.15" diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b07330 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +Generate a file containing every single UTF8 codepoint, its glyph, and its name. + +First do `cargo run`, then `sh filter_and_format.sh`. Your final file will be +called `formatted.txt`. Do what you want with it. diff --git a/filter_and_format.sh b/filter_and_format.sh new file mode 100644 index 0000000..e92181a --- /dev/null +++ b/filter_and_format.sh @@ -0,0 +1,3 @@ +perl -ne 'if (/Private Use/i) {next;} else {print;}' all_chars.txt > filtered.txt + +perl -F'\t' -ae 'chomp @F; print("$F[0] $F[1] ", ($F[2] =~ s/\b(.)(.*?)\b/\U$1\L$2/gr), "\n")' filtered.txt > formatted.txt diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..36baf09 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,28 @@ +use std::io::Write; + +fn main() -> std::io::Result<()> { + let file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .open("all_chars.txt")?; + + let mut writer = std::io::BufWriter::new(file); + + for i in 0x0..(0x10FFFF + 1) { + match char::from_u32(i) { + Some(ch) => { + if ch.is_control() { + continue; + } + if let Some(name) = charname::get_name_checked(i) { + write!(writer, "U+{:04X}\t{}\t{}\n", i, ch, name)?; + } + } + None => { + continue; + } + }; + } + + Ok(()) +}