◐ Shell
reader mode source ↗
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
File filter
Conversations
Jump to
Diff view
Apply and reload
Show whitespace
Diff view
Apply and reload
1 change: 0 additions & 1 deletion Lib/test/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,6 @@ def test_isdecimal(self):
for ch in ['\U0001D7F6', '\U00011066', '\U000104A0']:
self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch))

@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False != True
def test_isdigit(self):
super().test_isdigit()
self.checkequalnofix(True, '\u2460', 'isdigit')
Expand Down
9 changes: 5 additions & 4 deletions crates/sre_engine/src/string.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use icu_properties::props::{CanonicalCombiningClass, EnumeratedProperty};
use rustpython_wtf8::Wtf8;

#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -444,9 +444,10 @@ pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
// TODO: check with cpython
char::try_from(ch)
.map(|x| {
x.is_alphanumeric()
&& CanonicalCombiningClass::for_char(x) == CanonicalCombiningClass::NotReordered
})
.unwrap_or(false)
}
Expand Down
27 changes: 18 additions & 9 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
@@ -45,8 +45,8 @@ use rustpython_common::{
};

use icu_properties::props::{
BidiClass, BinaryProperty, CanonicalCombiningClass, EnumeratedProperty, GeneralCategory,
XidContinue, XidStart,
};
use unicode_casing::CharExt;

Expand Down Expand Up @@ -949,23 +949,30 @@ impl PyStr {
fn isalnum(&self) -> bool {
!self.data.is_empty()
&& self.char_all(|c| {
c.is_alphanumeric()
&& CanonicalCombiningClass::for_char(c) == CanonicalCombiningClass::NotReordered
})
}

#[pymethod]
fn isnumeric(&self) -> bool {
!self.data.is_empty() && self.char_all(char::is_numeric)
}

#[pymethod]
fn isdigit(&self) -> bool {
// python's isdigit also checks if exponents are digits, these are the unicode codepoints for exponents
!self.data.is_empty()
&& self.char_all(|c| {
c.is_ascii_digit()
|| matches!(c, '⁰' | '¹' | '²' | '³' | '⁴' | '⁵' | '⁶' | '⁷' | '⁸' | '⁹')
})
}

Expand Down Expand Up @@ -1064,7 +1071,9 @@ impl PyStr {

#[pymethod]
fn isalpha(&self) -> bool {
!self.data.is_empty() && self.char_all(char::is_alphabetic)
}

#[pymethod]
Expand Down
13 changes: 13 additions & 0 deletions extra_tests/snippets/builtin_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,29 @@
assert "\u1c89".istitle()
# assert "DZ".title() == "Dz"
assert a.isalpha()

# Combining characters differ slightly between Rust and Python
assert "\u006e".isalnum()
assert not "\u0303".isalnum()
assert not "\u006e\u0303".isalnum()
assert "\u00f1".isalnum()
assert not "\u0345".isalnum()
for raw in range(0x0363, 0x036F):
assert not chr(raw).isalnum()

s = "1 2 3"
assert s.split(" ", 1) == ["1", "2 3"]
assert s.rsplit(" ", 1) == ["1 2", "3"]
Expand Down
Loading
Toggle all file notes Toggle all file annotations