inital commit. feature complete

This commit is contained in:
Noah Swerhun 2024-03-08 14:57:28 -06:00
commit 206d4840ba
6 changed files with 125 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/target
*.txt

79
Cargo.lock generated Normal file
View file

@ -0,0 +1,79 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "charname"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ae20be7f5840b66fb8ffe9750ff963eed482984934484815a49dc8855ca80e1"
dependencies = [
"phf",
"phf_codegen",
]
[[package]]
name = "chars"
version = "0.1.0"
dependencies = [
"charname",
]
[[package]]
name = "phf"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
dependencies = [
"siphasher",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"

9
Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "chars"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
charname = "0.2.15"

4
README.md Normal file
View file

@ -0,0 +1,4 @@
Generate a file containing every single UTF8 codepoint, its glyph, and its name.
First do `cargo run`, then `sh filter_and_format.sh`. Your final file will be
called `formatted.txt`. Do what you want with it.

3
filter_and_format.sh Normal file
View file

@ -0,0 +1,3 @@
perl -ne 'if (/Private Use/i) {next;} else {print;}' all_chars.txt > filtered.txt
perl -F'\t' -ae 'chomp @F; print("$F[0] $F[1] ", ($F[2] =~ s/\b(.)(.*?)\b/\U$1\L$2/gr), "\n")' filtered.txt > formatted.txt

28
src/main.rs Normal file
View file

@ -0,0 +1,28 @@
use std::io::Write;
fn main() -> std::io::Result<()> {
let file = std::fs::OpenOptions::new()
.write(true)
.create(true)
.open("all_chars.txt")?;
let mut writer = std::io::BufWriter::new(file);
for i in 0x0..(0x10FFFF + 1) {
match char::from_u32(i) {
Some(ch) => {
if ch.is_control() {
continue;
}
if let Some(name) = charname::get_name_checked(i) {
write!(writer, "U+{:04X}\t{}\t{}\n", i, ch, name)?;
}
}
None => {
continue;
}
};
}
Ok(())
}