Files
adler32
ahash
aho_corasick
alga
alga_derive
alsa_sys
amethyst
amethyst_animation
amethyst_assets
amethyst_audio
amethyst_config
amethyst_controls
amethyst_core
amethyst_derive
amethyst_error
amethyst_input
amethyst_locale
amethyst_network
amethyst_rendy
amethyst_ui
amethyst_utils
amethyst_window
andrew
approx
arrayvec
ash
atom
atty
backtrace
backtrace_sys
base64
bincode
bitflags
byteorder
bytes
c2_chacha
cfg_if
cgmath
chrono
claxon
clipboard
color_quant
colored
colorful
const_random
const_random_macro
cookie
cookie_store
cpal
crc
crc32fast
crossbeam_channel
crossbeam_deque
crossbeam_epoch
crossbeam_queue
crossbeam_utils
ctor
debugid
deflate
derivative
derive_new
dirs
dlib
downcast_rs
dtoa
edit_distance
either
encoding_rs
env_logger
erased_serde
err_derive
error_chain
euclid
euclid_macros
expat_sys
failure
failure_derive
fern
flate2
float_ord
fluent
fluent_bundle
fluent_locale
fluent_syntax
fnv
font_kit
fontconfig
fontconfig_sys
foreign_types
foreign_types_shared
freetype
freetype_sys
futures
futures_cpupool
fxhash
generic_array
genmesh
getrandom
getset
gfx_backend_vulkan
gfx_hal
ghost
gif
glsl_layout
glsl_layout_derive
glyph_brush
glyph_brush_layout
h2
hashbrown
heck
hibitset
hostname
hound
http
http_body
httparse
httpdate
humantime
hyper
hyper_tls
idna
im
image
indexmap
inflate
intl_pluralrules
inventory
inventory_impl
iovec
itertools
itoa
jpeg_decoder
laminar
lazy_static
lewton
lexical
lexical_core
libc
libloading
libm
line_drawing
linked_hash_map
lock_api
log
lyon_geom
lyon_path
lzw
matches
matrixmultiply
maybe_uninit
memchr
memmap
memoffset
mime
mime_guess
minimp3
minimp3_sys
miniz_oxide
mint
minterpolate
mio
mopa
nalgebra
base
geometry
linalg
native_tls
net2
nix
nodrop
num
num_bigint
num_complex
num_cpus
num_derive
num_integer
num_iter
num_rational
num_traits
objekt
ogg
openssl
openssl_probe
openssl_sys
ordered_float
owning_ref
palette
palette_derive
parking_lot
parking_lot_core
paste
paste_impl
percent_encoding
phf
phf_shared
png
ppv_lite86
proc_macro2
proc_macro_hack
proc_macro_roids
publicsuffix
quick_error
quickcheck
quote
rand
rand_chacha
rand_core
rand_hc
rand_isaac
rand_jitter
rand_os
rand_pcg
rand_xorshift
rawpointer
rayon
rayon_core
regex
regex_syntax
relevant
rendy
rendy_chain
rendy_command
rendy_descriptor
rendy_factory
rendy_frame
rendy_graph
rendy_memory
rendy_mesh
rendy_resource
rendy_shader
rendy_texture
rendy_util
rendy_wsi
rental
rental_impl
reqwest
rgb
rodio
ron
rustc_demangle
rustc_hash
rustc_version
rustc_version_runtime
rusttype
ryu
same_file
scoped_threadpool
scopeguard
semver
semver_parser
sentry
sentry_types
serde
serde_bytes
serde_derive
serde_json
serde_urlencoded
shared_library
shred
shred_derive
shrev
singularity_rs
siphasher
sized_chunks
slab
slice_deque
smallvec
smithay_client_toolkit
specs
specs_derive
specs_hierarchy
stable_deref_trait
stackvector
static_assertions
stb_truetype
string
syn
synstructure
termcolor
thread_local
thread_profiler
tiff
time
tokio
tokio_buf
tokio_current_thread
tokio_executor
tokio_io
tokio_reactor
tokio_sync
tokio_tcp
tokio_threadpool
tokio_timer
try_from
try_lock
tuple_utils
twox_hash
typenum
uname
unic_langid
unic_langid_impl
unic_langid_macros
unic_langid_macros_impl
unicase
unicode_bidi
unicode_normalization
unicode_segmentation
unicode_xid
unreachable
url
url_serde
uuid
void
walkdir
want
wavefront_obj
wayland_client
wayland_commons
wayland_protocols
wayland_sys
winapi
winconsole
winit
x11
x11_clipboard
x11_dl
xcb
xdg
xi_unicode
xml
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Unicode character composition and decomposition utilities
//! as described in
//! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
//!
//! ```rust
//! extern crate unicode_normalization;
//!
//! use unicode_normalization::char::compose;
//! use unicode_normalization::UnicodeNormalization;
//!
//! fn main() {
//!     assert_eq!(compose('A','\u{30a}'), Some('Å'));
//!
//!     let s = "ÅΩ";
//!     let c = s.nfc().collect::<String>();
//!     assert_eq!(c, "ÅΩ");
//! }
//! ```
//!
//! # crates.io
//!
//! You can use this package in your project by adding the following
//! to your `Cargo.toml`:
//!
//! ```toml
//! [dependencies]
//! unicode-normalization = "0.1.8"
//! ```

#![deny(missing_docs, unsafe_code)]
#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
       html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]

extern crate smallvec;

pub use tables::UNICODE_VERSION;
pub use decompose::Decompositions;
pub use quick_check::{
    IsNormalized,
    is_nfc,
    is_nfc_quick,
    is_nfkc,
    is_nfkc_quick,
    is_nfc_stream_safe,
    is_nfc_stream_safe_quick,
    is_nfd,
    is_nfd_quick,
    is_nfkd,
    is_nfkd_quick,
    is_nfd_stream_safe,
    is_nfd_stream_safe_quick,
};
pub use recompose::Recompositions;
pub use stream_safe::StreamSafe;
use std::str::Chars;

mod decompose;
mod normalize;
mod recompose;
mod quick_check;
mod stream_safe;
mod tables;

#[cfg(test)]
mod test;
#[cfg(test)]
mod normalization_tests;

/// Methods for composing and decomposing characters.
pub mod char {
    pub use normalize::{decompose_canonical, decompose_compatible, compose};

    /// Look up the canonical combining class of a character.
    pub use tables::canonical_combining_class;

    /// Return whether the given character is a combining mark (`General_Category=Mark`)
    pub use tables::is_combining_mark;
}


/// Methods for iterating over strings while applying Unicode normalizations
/// as described in
/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
pub trait UnicodeNormalization<I: Iterator<Item=char>> {
    /// Returns an iterator over the string in Unicode Normalization Form D
    /// (canonical decomposition).
    #[inline]
    fn nfd(self) -> Decompositions<I>;

    /// Returns an iterator over the string in Unicode Normalization Form KD
    /// (compatibility decomposition).
    #[inline]
    fn nfkd(self) -> Decompositions<I>;

    /// An Iterator over the string in Unicode Normalization Form C
    /// (canonical decomposition followed by canonical composition).
    #[inline]
    fn nfc(self) -> Recompositions<I>;

    /// An Iterator over the string in Unicode Normalization Form KC
    /// (compatibility decomposition followed by canonical composition).
    #[inline]
    fn nfkc(self) -> Recompositions<I>;

    /// An Iterator over the string with Conjoining Grapheme Joiner characters
    /// inserted according to the Stream-Safe Text Process (UAX15-D4)
    #[inline]
    fn stream_safe(self) -> StreamSafe<I>;
}

impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
    #[inline]
    fn nfd(self) -> Decompositions<Chars<'a>> {
        decompose::new_canonical(self.chars())
    }

    #[inline]
    fn nfkd(self) -> Decompositions<Chars<'a>> {
        decompose::new_compatible(self.chars())
    }

    #[inline]
    fn nfc(self) -> Recompositions<Chars<'a>> {
        recompose::new_canonical(self.chars())
    }

    #[inline]
    fn nfkc(self) -> Recompositions<Chars<'a>> {
        recompose::new_compatible(self.chars())
    }

    #[inline]
    fn stream_safe(self) -> StreamSafe<Chars<'a>> {
        StreamSafe::new(self.chars())
    }
}

impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I {
    #[inline]
    fn nfd(self) -> Decompositions<I> {
        decompose::new_canonical(self)
    }

    #[inline]
    fn nfkd(self) -> Decompositions<I> {
        decompose::new_compatible(self)
    }

    #[inline]
    fn nfc(self) -> Recompositions<I> {
        recompose::new_canonical(self)
    }

    #[inline]
    fn nfkc(self) -> Recompositions<I> {
        recompose::new_compatible(self)
    }

    #[inline]
    fn stream_safe(self) -> StreamSafe<I> {
        StreamSafe::new(self)
    }
}