◐ Shell
clean mode source ↗

unicode package - unicode - Go Packages

Package unicode provides data and functions to test some properties of Unicode code points.

Functions starting with "Is" can be used to inspect which table of range a rune belongs to. Note that runes may fit into more than one range.

package main

import (
	"fmt"
	"unicode"
)

func main() {

	// constant with mixed type runes
	const mixed = "\b5Ὂg̀9! ℃ᾭG"
	for _, c := range mixed {
		fmt.Printf("For %q:\n", c)
		if unicode.IsControl(c) {
			fmt.Println("\tis control rune")
		}
		if unicode.IsDigit(c) {
			fmt.Println("\tis digit rune")
		}
		if unicode.IsGraphic(c) {
			fmt.Println("\tis graphic rune")
		}
		if unicode.IsLetter(c) {
			fmt.Println("\tis letter rune")
		}
		if unicode.IsLower(c) {
			fmt.Println("\tis lower case rune")
		}
		if unicode.IsMark(c) {
			fmt.Println("\tis mark rune")
		}
		if unicode.IsNumber(c) {
			fmt.Println("\tis number rune")
		}
		if unicode.IsPrint(c) {
			fmt.Println("\tis printable rune")
		}
		if !unicode.IsPrint(c) {
			fmt.Println("\tis not printable rune")
		}
		if unicode.IsPunct(c) {
			fmt.Println("\tis punct rune")
		}
		if unicode.IsSpace(c) {
			fmt.Println("\tis space rune")
		}
		if unicode.IsSymbol(c) {
			fmt.Println("\tis symbol rune")
		}
		if unicode.IsTitle(c) {
			fmt.Println("\tis title case rune")
		}
		if unicode.IsUpper(c) {
			fmt.Println("\tis upper case rune")
		}
	}

}
Output:
For '\b':
	is control rune
	is not printable rune
For '5':
	is digit rune
	is graphic rune
	is number rune
	is printable rune
For 'Ὂ':
	is graphic rune
	is letter rune
	is printable rune
	is upper case rune
For 'g':
	is graphic rune
	is letter rune
	is lower case rune
	is printable rune
For '̀':
	is graphic rune
	is mark rune
	is printable rune
For '9':
	is digit rune
	is graphic rune
	is number rune
	is printable rune
For '!':
	is graphic rune
	is printable rune
	is punct rune
For ' ':
	is graphic rune
	is printable rune
	is space rune
For '℃':
	is graphic rune
	is printable rune
	is symbol rune
For 'ᾭ':
	is graphic rune
	is letter rune
	is printable rune
	is title case rune
For 'G':
	is graphic rune
	is letter rune
	is printable rune
	is upper case rune

View Source

const (
	MaxRune         = '\U0010FFFF' 
	ReplacementChar = '\uFFFD'     
	MaxASCII        = '\u007F'     
	MaxLatin1       = '\u00FF'     
)

Indices into the Delta arrays inside CaseRanges for case mapping.

If the Delta field of a CaseRange is UpperLower, it means this CaseRange represents a sequence of the form (say) Upper Lower Upper Lower.

Version is the Unicode edition from which the tables are derived.

View Source

var (
	Cc     = _Cc 
	Cf     = _Cf 
	Cn     = _Cn 
	Co     = _Co 
	Cs     = _Cs 
	Digit  = _Nd 
	Nd     = _Nd 
	LC     = _LC 
	Letter = _L  
	L      = _L
	Lm     = _Lm 
	Lo     = _Lo 
	Lower  = _Ll 
	Ll     = _Ll 
	Mark   = _M  
	M      = _M
	Mc     = _Mc 
	Me     = _Me 
	Mn     = _Mn 
	Nl     = _Nl 
	No     = _No 
	Number = _N  
	N      = _N
	Other  = _C 
	C      = _C
	Pc     = _Pc 
	Pd     = _Pd 
	Pe     = _Pe 
	Pf     = _Pf 
	Pi     = _Pi 
	Po     = _Po 
	Ps     = _Ps 
	Punct  = _P  
	P      = _P
	Sc     = _Sc 
	Sk     = _Sk 
	Sm     = _Sm 
	So     = _So 
	Space  = _Z  
	Z      = _Z
	Symbol = _S 
	S      = _S
	Title  = _Lt 
	Lt     = _Lt 
	Upper  = _Lu 
	Lu     = _Lu 
	Zl     = _Zl 
	Zp     = _Zp 
	Zs     = _Zs 
)

These variables have type *RangeTable.

View Source

var (
	Adlam                  = _Adlam                  
	Ahom                   = _Ahom                   
	Anatolian_Hieroglyphs  = _Anatolian_Hieroglyphs  
	Arabic                 = _Arabic                 
	Armenian               = _Armenian               
	Avestan                = _Avestan                
	Balinese               = _Balinese               
	Bamum                  = _Bamum                  
	Bassa_Vah              = _Bassa_Vah              
	Batak                  = _Batak                  
	Bengali                = _Bengali                
	Bhaiksuki              = _Bhaiksuki              
	Bopomofo               = _Bopomofo               
	Brahmi                 = _Brahmi                 
	Braille                = _Braille                
	Buginese               = _Buginese               
	Buhid                  = _Buhid                  
	Canadian_Aboriginal    = _Canadian_Aboriginal    
	Carian                 = _Carian                 
	Caucasian_Albanian     = _Caucasian_Albanian     
	Chakma                 = _Chakma                 
	Cham                   = _Cham                   
	Cherokee               = _Cherokee               
	Chorasmian             = _Chorasmian             
	Common                 = _Common                 
	Coptic                 = _Coptic                 
	Cuneiform              = _Cuneiform              
	Cypriot                = _Cypriot                
	Cypro_Minoan           = _Cypro_Minoan           
	Cyrillic               = _Cyrillic               
	Deseret                = _Deseret                
	Devanagari             = _Devanagari             
	Dives_Akuru            = _Dives_Akuru            
	Dogra                  = _Dogra                  
	Duployan               = _Duployan               
	Egyptian_Hieroglyphs   = _Egyptian_Hieroglyphs   
	Elbasan                = _Elbasan                
	Elymaic                = _Elymaic                
	Ethiopic               = _Ethiopic               
	Georgian               = _Georgian               
	Glagolitic             = _Glagolitic             
	Gothic                 = _Gothic                 
	Grantha                = _Grantha                
	Greek                  = _Greek                  
	Gujarati               = _Gujarati               
	Gunjala_Gondi          = _Gunjala_Gondi          
	Gurmukhi               = _Gurmukhi               
	Han                    = _Han                    
	Hangul                 = _Hangul                 
	Hanifi_Rohingya        = _Hanifi_Rohingya        
	Hanunoo                = _Hanunoo                
	Hatran                 = _Hatran                 
	Hebrew                 = _Hebrew                 
	Hiragana               = _Hiragana               
	Imperial_Aramaic       = _Imperial_Aramaic       
	Inherited              = _Inherited              
	Inscriptional_Pahlavi  = _Inscriptional_Pahlavi  
	Inscriptional_Parthian = _Inscriptional_Parthian 
	Javanese               = _Javanese               
	Kaithi                 = _Kaithi                 
	Kannada                = _Kannada                
	Katakana               = _Katakana               
	Kawi                   = _Kawi                   
	Kayah_Li               = _Kayah_Li               
	Kharoshthi             = _Kharoshthi             
	Khitan_Small_Script    = _Khitan_Small_Script    
	Khmer                  = _Khmer                  
	Khojki                 = _Khojki                 
	Khudawadi              = _Khudawadi              
	Lao                    = _Lao                    
	Latin                  = _Latin                  
	Lepcha                 = _Lepcha                 
	Limbu                  = _Limbu                  
	Linear_A               = _Linear_A               
	Linear_B               = _Linear_B               
	Lisu                   = _Lisu                   
	Lycian                 = _Lycian                 
	Lydian                 = _Lydian                 
	Mahajani               = _Mahajani               
	Makasar                = _Makasar                
	Malayalam              = _Malayalam              
	Mandaic                = _Mandaic                
	Manichaean             = _Manichaean             
	Marchen                = _Marchen                
	Masaram_Gondi          = _Masaram_Gondi          
	Medefaidrin            = _Medefaidrin            
	Meetei_Mayek           = _Meetei_Mayek           
	Mende_Kikakui          = _Mende_Kikakui          
	Meroitic_Cursive       = _Meroitic_Cursive       
	Meroitic_Hieroglyphs   = _Meroitic_Hieroglyphs   
	Miao                   = _Miao                   
	Modi                   = _Modi                   
	Mongolian              = _Mongolian              
	Mro                    = _Mro                    
	Multani                = _Multani                
	Myanmar                = _Myanmar                
	Nabataean              = _Nabataean              
	Nag_Mundari            = _Nag_Mundari            
	Nandinagari            = _Nandinagari            
	New_Tai_Lue            = _New_Tai_Lue            
	Newa                   = _Newa                   
	Nko                    = _Nko                    
	Nushu                  = _Nushu                  
	Nyiakeng_Puachue_Hmong = _Nyiakeng_Puachue_Hmong 
	Ogham                  = _Ogham                  
	Ol_Chiki               = _Ol_Chiki               
	Old_Hungarian          = _Old_Hungarian          
	Old_Italic             = _Old_Italic             
	Old_North_Arabian      = _Old_North_Arabian      
	Old_Permic             = _Old_Permic             
	Old_Persian            = _Old_Persian            
	Old_Sogdian            = _Old_Sogdian            
	Old_South_Arabian      = _Old_South_Arabian      
	Old_Turkic             = _Old_Turkic             
	Old_Uyghur             = _Old_Uyghur             
	Oriya                  = _Oriya                  
	Osage                  = _Osage                  
	Osmanya                = _Osmanya                
	Pahawh_Hmong           = _Pahawh_Hmong           
	Palmyrene              = _Palmyrene              
	Pau_Cin_Hau            = _Pau_Cin_Hau            
	Phags_Pa               = _Phags_Pa               
	Phoenician             = _Phoenician             
	Psalter_Pahlavi        = _Psalter_Pahlavi        
	Rejang                 = _Rejang                 
	Runic                  = _Runic                  
	Samaritan              = _Samaritan              
	Saurashtra             = _Saurashtra             
	Sharada                = _Sharada                
	Shavian                = _Shavian                
	Siddham                = _Siddham                
	SignWriting            = _SignWriting            
	Sinhala                = _Sinhala                
	Sogdian                = _Sogdian                
	Sora_Sompeng           = _Sora_Sompeng           
	Soyombo                = _Soyombo                
	Sundanese              = _Sundanese              
	Syloti_Nagri           = _Syloti_Nagri           
	Syriac                 = _Syriac                 
	Tagalog                = _Tagalog                
	Tagbanwa               = _Tagbanwa               
	Tai_Le                 = _Tai_Le                 
	Tai_Tham               = _Tai_Tham               
	Tai_Viet               = _Tai_Viet               
	Takri                  = _Takri                  
	Tamil                  = _Tamil                  
	Tangsa                 = _Tangsa                 
	Tangut                 = _Tangut                 
	Telugu                 = _Telugu                 
	Thaana                 = _Thaana                 
	Thai                   = _Thai                   
	Tibetan                = _Tibetan                
	Tifinagh               = _Tifinagh               
	Tirhuta                = _Tirhuta                
	Toto                   = _Toto                   
	Ugaritic               = _Ugaritic               
	Vai                    = _Vai                    
	Vithkuqi               = _Vithkuqi               
	Wancho                 = _Wancho                 
	Warang_Citi            = _Warang_Citi            
	Yezidi                 = _Yezidi                 
	Yi                     = _Yi                     
	Zanabazar_Square       = _Zanabazar_Square       
)

These variables have type *RangeTable.

View Source

var (
	ASCII_Hex_Digit                    = _ASCII_Hex_Digit                    
	Bidi_Control                       = _Bidi_Control                       
	Dash                               = _Dash                               
	Deprecated                         = _Deprecated                         
	Diacritic                          = _Diacritic                          
	Extender                           = _Extender                           
	Hex_Digit                          = _Hex_Digit                          
	Hyphen                             = _Hyphen                             
	IDS_Binary_Operator                = _IDS_Binary_Operator                
	IDS_Trinary_Operator               = _IDS_Trinary_Operator               
	Ideographic                        = _Ideographic                        
	Join_Control                       = _Join_Control                       
	Logical_Order_Exception            = _Logical_Order_Exception            
	Noncharacter_Code_Point            = _Noncharacter_Code_Point            
	Other_Alphabetic                   = _Other_Alphabetic                   
	Other_Default_Ignorable_Code_Point = _Other_Default_Ignorable_Code_Point 
	Other_Grapheme_Extend              = _Other_Grapheme_Extend              
	Other_ID_Continue                  = _Other_ID_Continue                  
	Other_ID_Start                     = _Other_ID_Start                     
	Other_Lowercase                    = _Other_Lowercase                    
	Other_Math                         = _Other_Math                         
	Other_Uppercase                    = _Other_Uppercase                    
	Pattern_Syntax                     = _Pattern_Syntax                     
	Pattern_White_Space                = _Pattern_White_Space                
	Prepended_Concatenation_Mark       = _Prepended_Concatenation_Mark       
	Quotation_Mark                     = _Quotation_Mark                     
	Radical                            = _Radical                            
	Regional_Indicator                 = _Regional_Indicator                 
	STerm                              = _Sentence_Terminal                  
	Sentence_Terminal                  = _Sentence_Terminal                  
	Soft_Dotted                        = _Soft_Dotted                        
	Terminal_Punctuation               = _Terminal_Punctuation               
	Unified_Ideograph                  = _Unified_Ideograph                  
	Variation_Selector                 = _Variation_Selector                 
	White_Space                        = _White_Space                        
)

These variables have type *RangeTable.

CaseRanges is the table describing case mappings for all letters with non-self mappings.

View Source

var Categories = map[string]*RangeTable{
	"C":  C,
	"Cc": Cc,
	"Cf": Cf,
	"Cn": Cn,
	"Co": Co,
	"Cs": Cs,
	"L":  L,
	"LC": LC,
	"Ll": Ll,
	"Lm": Lm,
	"Lo": Lo,
	"Lt": Lt,
	"Lu": Lu,
	"M":  M,
	"Mc": Mc,
	"Me": Me,
	"Mn": Mn,
	"N":  N,
	"Nd": Nd,
	"Nl": Nl,
	"No": No,
	"P":  P,
	"Pc": Pc,
	"Pd": Pd,
	"Pe": Pe,
	"Pf": Pf,
	"Pi": Pi,
	"Po": Po,
	"Ps": Ps,
	"S":  S,
	"Sc": Sc,
	"Sk": Sk,
	"Sm": Sm,
	"So": So,
	"Z":  Z,
	"Zl": Zl,
	"Zp": Zp,
	"Zs": Zs,
}

Categories is the set of Unicode category tables.

View Source

var CategoryAliases = map[string]string{
	"Cased_Letter":          "LC",
	"Close_Punctuation":     "Pe",
	"Combining_Mark":        "M",
	"Connector_Punctuation": "Pc",
	"Control":               "Cc",
	"Currency_Symbol":       "Sc",
	"Dash_Punctuation":      "Pd",
	"Decimal_Number":        "Nd",
	"Enclosing_Mark":        "Me",
	"Final_Punctuation":     "Pf",
	"Format":                "Cf",
	"Initial_Punctuation":   "Pi",
	"Letter":                "L",
	"Letter_Number":         "Nl",
	"Line_Separator":        "Zl",
	"Lowercase_Letter":      "Ll",
	"Mark":                  "M",
	"Math_Symbol":           "Sm",
	"Modifier_Letter":       "Lm",
	"Modifier_Symbol":       "Sk",
	"Nonspacing_Mark":       "Mn",
	"Number":                "N",
	"Open_Punctuation":      "Ps",
	"Other":                 "C",
	"Other_Letter":          "Lo",
	"Other_Number":          "No",
	"Other_Punctuation":     "Po",
	"Other_Symbol":          "So",
	"Paragraph_Separator":   "Zp",
	"Private_Use":           "Co",
	"Punctuation":           "P",
	"Separator":             "Z",
	"Space_Separator":       "Zs",
	"Spacing_Mark":          "Mc",
	"Surrogate":             "Cs",
	"Symbol":                "S",
	"Titlecase_Letter":      "Lt",
	"Unassigned":            "Cn",
	"Uppercase_Letter":      "Lu",
	"cntrl":                 "Cc",
	"digit":                 "Nd",
	"punct":                 "P",
}

CategoryAliases maps category aliases to standard category names.

View Source

var FoldCategory = map[string]*RangeTable{
	"L":  foldL,
	"Ll": foldLl,
	"Lt": foldLt,
	"Lu": foldLu,
	"M":  foldM,
	"Mn": foldMn,
}

FoldCategory maps a category name to a table of code points outside the category that are equivalent under simple case folding to code points inside the category. If there is no entry for a category name, there are no such points.

View Source

var FoldScript = map[string]*RangeTable{
	"Common":    foldCommon,
	"Greek":     foldGreek,
	"Inherited": foldInherited,
}

FoldScript maps a script name to a table of code points outside the script that are equivalent under simple case folding to code points inside the script. If there is no entry for a script name, there are no such points.

GraphicRanges defines the set of graphic characters according to Unicode.

PrintRanges defines the set of printable characters according to Go. ASCII space, U+0020, is handled separately.

View Source

var Properties = map[string]*RangeTable{
	"ASCII_Hex_Digit":                    ASCII_Hex_Digit,
	"Bidi_Control":                       Bidi_Control,
	"Dash":                               Dash,
	"Deprecated":                         Deprecated,
	"Diacritic":                          Diacritic,
	"Extender":                           Extender,
	"Hex_Digit":                          Hex_Digit,
	"Hyphen":                             Hyphen,
	"IDS_Binary_Operator":                IDS_Binary_Operator,
	"IDS_Trinary_Operator":               IDS_Trinary_Operator,
	"Ideographic":                        Ideographic,
	"Join_Control":                       Join_Control,
	"Logical_Order_Exception":            Logical_Order_Exception,
	"Noncharacter_Code_Point":            Noncharacter_Code_Point,
	"Other_Alphabetic":                   Other_Alphabetic,
	"Other_Default_Ignorable_Code_Point": Other_Default_Ignorable_Code_Point,
	"Other_Grapheme_Extend":              Other_Grapheme_Extend,
	"Other_ID_Continue":                  Other_ID_Continue,
	"Other_ID_Start":                     Other_ID_Start,
	"Other_Lowercase":                    Other_Lowercase,
	"Other_Math":                         Other_Math,
	"Other_Uppercase":                    Other_Uppercase,
	"Pattern_Syntax":                     Pattern_Syntax,
	"Pattern_White_Space":                Pattern_White_Space,
	"Prepended_Concatenation_Mark":       Prepended_Concatenation_Mark,
	"Quotation_Mark":                     Quotation_Mark,
	"Radical":                            Radical,
	"Regional_Indicator":                 Regional_Indicator,
	"Sentence_Terminal":                  Sentence_Terminal,
	"STerm":                              Sentence_Terminal,
	"Soft_Dotted":                        Soft_Dotted,
	"Terminal_Punctuation":               Terminal_Punctuation,
	"Unified_Ideograph":                  Unified_Ideograph,
	"Variation_Selector":                 Variation_Selector,
	"White_Space":                        White_Space,
}

Properties is the set of Unicode property tables.

Scripts is the set of Unicode script tables.

In reports whether the rune is a member of one of the ranges.

Is reports whether the rune is in the specified table of ranges.

IsControl reports whether the rune is a control character. The C (Other) Unicode category includes more code points such as surrogates; use Is(C, r) to test for them.

IsDigit reports whether the rune is a decimal digit.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsDigit('৩'))
	fmt.Printf("%t\n", unicode.IsDigit('A'))
}
Output:
true
false

IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such characters include letters, marks, numbers, punctuation, symbols, and spaces, from categories L, M, N, P, S, Zs.

IsLetter reports whether the rune is a letter (category L).

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsLetter('A'))
	fmt.Printf("%t\n", unicode.IsLetter('7'))
}
Output:
true
false

IsLower reports whether the rune is a lower case letter.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsLower('a'))
	fmt.Printf("%t\n", unicode.IsLower('A'))
}
Output:
true
false

IsMark reports whether the rune is a mark character (category M).

IsNumber reports whether the rune is a number (category N).

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsNumber('Ⅷ'))
	fmt.Printf("%t\n", unicode.IsNumber('A'))
}
Output:
true
false

IsOneOf reports whether the rune is a member of one of the ranges. The function "In" provides a nicer signature and should be used in preference to IsOneOf.

IsPrint reports whether the rune is defined as printable by Go. Such characters include letters, marks, numbers, punctuation, symbols, and the ASCII space character, from categories L, M, N, P, S and the ASCII space character. This categorization is the same as IsGraphic except that the only spacing character is ASCII space, U+0020.

IsPunct reports whether the rune is a Unicode punctuation character (category P).

IsSpace reports whether the rune is a space character as defined by Unicode's White Space property; in the Latin-1 space this is

'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).

Other definitions of spacing characters are set by category Z and property Pattern_White_Space.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsSpace(' '))
	fmt.Printf("%t\n", unicode.IsSpace('\n'))
	fmt.Printf("%t\n", unicode.IsSpace('\t'))
	fmt.Printf("%t\n", unicode.IsSpace('a'))
}
Output:
true
true
true
false

IsSymbol reports whether the rune is a symbolic character.

IsTitle reports whether the rune is a title case letter.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsTitle('Dž'))
	fmt.Printf("%t\n", unicode.IsTitle('a'))
}
Output:
true
false

IsUpper reports whether the rune is an upper case letter.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%t\n", unicode.IsUpper('A'))
	fmt.Printf("%t\n", unicode.IsUpper('a'))
}
Output:
true
false

SimpleFold iterates over Unicode code points equivalent under the Unicode-defined simple case folding. Among the code points equivalent to rune (including rune itself), SimpleFold returns the smallest rune > r if one exists, or else the smallest rune >= 0. If r is not a valid Unicode code point, SimpleFold(r) returns r.

For example:

SimpleFold('A') = 'a'
SimpleFold('a') = 'A'

SimpleFold('K') = 'k'
SimpleFold('k') = '\u212A' (Kelvin symbol, K)
SimpleFold('\u212A') = 'K'

SimpleFold('1') = '1'

SimpleFold(-2) = -2
package main

import (
	"fmt"
	"unicode"
)

func main() {
	fmt.Printf("%#U\n", unicode.SimpleFold('A'))      // 'a'
	fmt.Printf("%#U\n", unicode.SimpleFold('a'))      // 'A'
	fmt.Printf("%#U\n", unicode.SimpleFold('K'))      // 'k'
	fmt.Printf("%#U\n", unicode.SimpleFold('k'))      // '\u212A' (Kelvin symbol, K)
	fmt.Printf("%#U\n", unicode.SimpleFold('\u212A')) // 'K'
	fmt.Printf("%#U\n", unicode.SimpleFold('1'))      // '1'

}
Output:
U+0061 'a'
U+0041 'A'
U+006B 'k'
U+212A 'K'
U+004B 'K'
U+0031 '1'

To maps the rune to the specified case: UpperCase, LowerCase, or TitleCase.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	const lcG = 'g'
	fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, lcG))
	fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, lcG))
	fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, lcG))

	const ucG = 'G'
	fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, ucG))
	fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, ucG))
	fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, ucG))

}
Output:
U+0047 'G'
U+0067 'g'
U+0047 'G'
U+0047 'G'
U+0067 'g'
U+0047 'G'

ToLower maps the rune to lower case.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	const ucG = 'G'
	fmt.Printf("%#U\n", unicode.ToLower(ucG))

}
Output:
U+0067 'g'

ToTitle maps the rune to title case.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	const ucG = 'g'
	fmt.Printf("%#U\n", unicode.ToTitle(ucG))

}
Output:
U+0047 'G'

ToUpper maps the rune to upper case.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	const ucG = 'g'
	fmt.Printf("%#U\n", unicode.ToUpper(ucG))

}
Output:
U+0047 'G'

CaseRange represents a range of Unicode code points for simple (one code point to one code point) case conversion. The range runs from Lo to Hi inclusive, with a fixed stride of 1. Deltas are the number to add to the code point to reach the code point for a different case for that character. They may be negative. If zero, it means the character is in the corresponding case. There is a special case representing sequences of alternating corresponding Upper and Lower pairs. It appears with a fixed Delta of

{UpperLower, UpperLower, UpperLower}

The constant UpperLower has an otherwise impossible delta value.

Range16 represents of a range of 16-bit Unicode code points. The range runs from Lo to Hi inclusive and has the specified stride.

Range32 represents of a range of Unicode code points and is used when one or more of the values will not fit in 16 bits. The range runs from Lo to Hi inclusive and has the specified stride. Lo and Hi must always be >= 1<<16.

type RangeTable struct {
	R16         []Range16
	R32         []Range32
	LatinOffset int 
}

RangeTable defines a set of Unicode code points by listing the ranges of code points within the set. The ranges are listed in two slices to save space: a slice of 16-bit ranges and a slice of 32-bit ranges. The two slices must be in sorted order and non-overlapping. Also, R32 should contain only values >= 0x10000 (1<<16).

type SpecialCase []CaseRange

SpecialCase represents language-specific case mappings such as Turkish. Methods of SpecialCase customize (by overriding) the standard mappings.

package main

import (
	"fmt"
	"unicode"
)

func main() {
	t := unicode.TurkishCase

	const lci = 'i'
	fmt.Printf("%#U\n", t.ToLower(lci))
	fmt.Printf("%#U\n", t.ToTitle(lci))
	fmt.Printf("%#U\n", t.ToUpper(lci))

	const uci = 'İ'
	fmt.Printf("%#U\n", t.ToLower(uci))
	fmt.Printf("%#U\n", t.ToTitle(uci))
	fmt.Printf("%#U\n", t.ToUpper(uci))

}
Output:
U+0069 'i'
U+0130 'İ'
U+0130 'İ'
U+0069 'i'
U+0130 'İ'
U+0130 'İ'
var AzeriCase SpecialCase = _TurkishCase
var TurkishCase SpecialCase = _TurkishCase

ToLower maps the rune to lower case giving priority to the special mapping.

ToTitle maps the rune to title case giving priority to the special mapping.

ToUpper maps the rune to upper case giving priority to the special mapping.

  • There is no mechanism for full case folding, that is, for characters that involve multiple runes in the input or output.