Browse Source

Refactor SpotifyId

- perf:
  * base62 encoding is an order of magnitude faster (~20x);
  * base16/62 enc/dec and from_uri are several times faster (~2-20x);
  * Let FileId::to_base16() reuse the hex encoder (~20x);

- changes:
  * Add to_uri() method;
  * Make from_uri() error handling consistent;
  * Move audio type from string matching to a SpotifyAudioType factory (private);
  * Implent From/Into<&str> for SpotifyAudioType;
  * Add representation sizes as associated constants (private);

- cs/docs:
  * Add rudimentary docs for most public funcs;
  * Add trivial test cases for the codecs;
Michał Chojnacki 4 years ago
parent
commit
6b09836117
1 changed files with 360 additions and 64 deletions
  1. 360 64
      core/src/spotify_id.rs

+ 360 - 64
core/src/spotify_id.rs

@@ -8,6 +8,44 @@ pub enum SpotifyAudioType {
     NonPlayable,
 }
 
+impl SpotifyAudioType {
+    fn from_str(src: &str) -> SpotifyAudioType {
+        match src {
+            "track" => SpotifyAudioType::Track,
+            "episode" => SpotifyAudioType::Podcast,
+            _ => SpotifyAudioType::NonPlayable,
+        }
+    }
+
+    fn to_str(self) -> &'static str {
+        match self {
+            SpotifyAudioType::Track => "track",
+            SpotifyAudioType::Podcast => "episode",
+            SpotifyAudioType::NonPlayable => "unknown",
+        }
+    }
+
+    fn len(self) -> usize {
+        match self {
+            SpotifyAudioType::Track => 5,
+            SpotifyAudioType::Podcast => 7,
+            SpotifyAudioType::NonPlayable => 7,
+        }
+    }
+}
+
+impl std::convert::From<&str> for SpotifyAudioType {
+    fn from(v: &str) -> Self {
+        SpotifyAudioType::from_str(v)
+    }
+}
+
+impl std::convert::Into<&str> for SpotifyAudioType {
+    fn into(self) -> &'static str {
+        self.to_str()
+    }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub struct SpotifyId {
     pub id: u128,
@@ -17,104 +55,184 @@ pub struct SpotifyId {
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
 pub struct SpotifyIdError;
 
-const BASE62_DIGITS: &'static [u8] =
-    b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-const BASE16_DIGITS: &'static [u8] = b"0123456789abcdef";
+const BASE62_DIGITS: &[u8; 62] = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+const BASE16_DIGITS: &[u8; 16] = b"0123456789abcdef";
 
 impl SpotifyId {
+    const SIZE: usize = 16;
+    const SIZE_BASE16: usize = 32;
+    const SIZE_BASE62: usize = 22;
+
     fn as_track(n: u128) -> SpotifyId {
         SpotifyId {
-            id: n.to_owned(),
+            id: n,
             audio_type: SpotifyAudioType::Track,
         }
     }
 
-    pub fn from_base16(id: &str) -> Result<SpotifyId, SpotifyIdError> {
-        let data = id.as_bytes();
+    /// Parses a base16 (hex) encoded [Spotify ID] into a `SpotifyId`.
+    ///
+    /// `src` is expected to be 32 bytes long and encoded using valid characters.
+    ///
+    /// [Spotify ID]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
+    pub fn from_base16(src: &str) -> Result<SpotifyId, SpotifyIdError> {
+        let mut dst: u128 = 0;
 
-        let mut n = 0u128;
-        for c in data {
-            let d = match BASE16_DIGITS.iter().position(|e| e == c) {
-                None => return Err(SpotifyIdError),
-                Some(x) => x as u128,
-            };
-            n = n * 16;
-            n = n + d;
+        for c in src.as_bytes() {
+            let p = match c {
+                b'0'..=b'9' => c - b'0',
+                b'a'..=b'f' => c - b'a' + 10,
+                _ => return Err(SpotifyIdError),
+            } as u128;
+
+            dst <<= 4;
+            dst += p;
         }
 
-        Ok(SpotifyId::as_track(n))
+        Ok(SpotifyId::as_track(dst))
     }
 
-    pub fn from_base62(id: &str) -> Result<SpotifyId, SpotifyIdError> {
-        let data = id.as_bytes();
+    /// Parses a base62 encoded [Spotify ID] into a `SpotifyId`.
+    ///
+    /// `src` is expected to be 22 bytes long and encoded using valid characters.
+    ///
+    /// [Spotify ID]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
+    pub fn from_base62(src: &str) -> Result<SpotifyId, SpotifyIdError> {
+        let mut dst: u128 = 0;
 
-        let mut n = 0u128;
-        for c in data {
-            let d = match BASE62_DIGITS.iter().position(|e| e == c) {
-                None => return Err(SpotifyIdError),
-                Some(x) => x as u128,
-            };
-            n = n * 62;
-            n = n + d;
+        for c in src.as_bytes() {
+            let p = match c {
+                b'0'..=b'9' => c - b'0',
+                b'a'..=b'z' => c - b'a' + 10,
+                b'A'..=b'Z' => c - b'A' + 36,
+                _ => return Err(SpotifyIdError),
+            } as u128;
+
+            dst *= 62;
+            dst += p;
         }
-        Ok(SpotifyId::as_track(n))
+
+        Ok(SpotifyId::as_track(dst))
     }
 
-    pub fn from_raw(data: &[u8]) -> Result<SpotifyId, SpotifyIdError> {
-        if data.len() != 16 {
+    /// Creates a `SpotifyId` from a copy of `SpotifyId::SIZE` (16) bytes in big-endian order.
+    ///
+    /// The resulting `SpotifyId` will default to a `SpotifyAudioType::TRACK`.
+    pub fn from_raw(src: &[u8]) -> Result<SpotifyId, SpotifyIdError> {
+        if src.len() != SpotifyId::SIZE {
             return Err(SpotifyIdError);
         };
 
-        let mut arr: [u8; 16] = Default::default();
-        arr.copy_from_slice(&data[0..16]);
+        let mut dst = [0u8; SpotifyId::SIZE];
+        dst.copy_from_slice(src);
 
-        Ok(SpotifyId::as_track(u128::from_be_bytes(arr)))
+        Ok(SpotifyId::as_track(u128::from_be_bytes(dst)))
     }
 
-    pub fn from_uri(uri: &str) -> Result<SpotifyId, SpotifyIdError> {
-        let parts = uri.split(":").collect::<Vec<&str>>();
-        let gid = parts.last().unwrap();
-        if uri.contains(":episode:") {
-            let mut spotify_id = SpotifyId::from_base62(gid).unwrap();
-            let _ = std::mem::replace(&mut spotify_id.audio_type, SpotifyAudioType::Podcast);
-            Ok(spotify_id)
-        } else if uri.contains(":track:") {
-            SpotifyId::from_base62(gid)
-        } else {
-            // show/playlist/artist/album/??
-            let mut spotify_id = SpotifyId::from_base62(gid).unwrap();
-            let _ = std::mem::replace(&mut spotify_id.audio_type, SpotifyAudioType::NonPlayable);
-            Ok(spotify_id)
+    /// Parses a [Spotify URI] into a `SpotifyId`.
+    ///
+    /// `uri` is expected to be in the canonical form `spotify:{type}:{id}`, where `{type}`
+    /// can be arbitrary while `{id}` is a 22-character long, base62 encoded Spotify ID.
+    ///
+    /// [Spotify URI]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
+    pub fn from_uri(src: &str) -> Result<SpotifyId, SpotifyIdError> {
+        // We expect the ID to be the last colon-delimited item in the URI.
+        let b = src.as_bytes();
+        let id_i = b.len() - SpotifyId::SIZE_BASE62;
+        if b[id_i - 1] != b':' {
+            return Err(SpotifyIdError);
         }
+
+        let mut id = match SpotifyId::from_base62(&src[id_i..]) {
+            Ok(v) => v,
+            Err(e) => return Err(e),
+        };
+
+        // Slice offset by 8 as we are skipping the "spotify:" prefix.
+        id.audio_type = src[8..id_i - 1].into();
+
+        Ok(id)
     }
 
+    /// Returns the `SpotifyId` as a base16 (hex) encoded, `SpotifyId::SIZE_BASE62` (22)
+    /// character long `String`.
     pub fn to_base16(&self) -> String {
-        format!("{:032x}", self.id)
+        to_base16(&self.to_raw(), &mut [0u8; SpotifyId::SIZE_BASE16])
     }
 
+    /// Returns the `SpotifyId` as a [canonically] base62 encoded, `SpotifyId::SIZE_BASE62` (22)
+    /// character long `String`.
+    ///
+    /// [canonically]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
     pub fn to_base62(&self) -> String {
-        let &SpotifyId { id: mut n, .. } = self;
+        let mut dst = [0u8; 22];
+        let mut i = 0;
+        let n = self.id;
 
-        let mut data = [0u8; 22];
-        for i in 0..22 {
-            data[21 - i] = BASE62_DIGITS[(n % 62) as usize];
-            n /= 62;
+        // The algorithm is based on:
+        // https://github.com/trezor/trezor-crypto/blob/c316e775a2152db255ace96b6b65ac0f20525ec0/base58.c
+        //
+        // We are not using naive division of self.id as it is an u128 and div + mod are software
+        // emulated at runtime (and unoptimized into mul + shift) on non-128bit platforms,
+        // making them very expensive.
+        //
+        // Trezor's algorithm allows us to stick to arithmetic on native registers making this
+        // an order of magnitude faster. Additionally, as our sizes are known, instead of
+        // dealing with the ID on a byte by byte basis, we decompose it into four u32s and
+        // use 64-bit arithmetic on them for an additional speedup.
+        for shift in &[96, 64, 32, 0] {
+            let mut carry = (n >> shift) as u32 as u64;
+
+            for b in &mut dst[..i] {
+                carry += (*b as u64) << 32;
+                *b = (carry % 62) as u8;
+                carry /= 62;
+            }
+
+            while carry > 0 {
+                dst[i] = (carry % 62) as u8;
+                carry /= 62;
+                i += 1;
+            }
         }
 
-        std::str::from_utf8(&data).unwrap().to_owned()
-    }
+        for b in &mut dst {
+            *b = BASE62_DIGITS[*b as usize];
+        }
 
-    pub fn to_uri(&self) -> String {
-        match self.audio_type {
-            SpotifyAudioType::Track => format!("spotify:track:{}", self.to_base62()),
-            SpotifyAudioType::Podcast => format!("spotify:episode:{}", self.to_base62()),
-            SpotifyAudioType::NonPlayable => format!("spotify:unknown:{}", self.to_base62()),
+        dst.reverse();
+
+        unsafe {
+            // Safety: We are only dealing with ASCII characters.
+            String::from_utf8_unchecked(dst.to_vec())
         }
     }
 
-    pub fn to_raw(&self) -> [u8; 16] {
+    /// Returns a copy of the `SpotifyId` as an array of `SpotifyId::SIZE` (16) bytes in
+    /// big-endian order.
+    pub fn to_raw(&self) -> [u8; SpotifyId::SIZE] {
         self.id.to_be_bytes()
     }
+
+    /// Returns the `SpotifyId` as a [Spotify URI] in the canonical form `spotify:{type}:{id}`,
+    /// where `{type}` is an arbitrary string and `{id}` is a 22-character long, base62 encoded
+    /// Spotify ID.
+    ///
+    /// If the `SpotifyId` has an associated type unrecognized by the library, `{type}` will
+    /// be encoded as `unknown`.
+    ///
+    /// [Spotify URI]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
+    pub fn to_uri(&self) -> String {
+        // 8 chars for the "spotify:" prefix + 1 colon + 22 chars base62 encoded ID  = 31
+        // + unknown size audio_type.
+        let mut dst = String::with_capacity(31 + self.audio_type.len());
+        dst.push_str("spotify:");
+        dst.push_str(self.audio_type.into());
+        dst.push_str(":");
+        dst.push_str(&self.to_base62());
+
+        dst
+    }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -122,11 +240,7 @@ pub struct FileId(pub [u8; 20]);
 
 impl FileId {
     pub fn to_base16(&self) -> String {
-        self.0
-            .iter()
-            .map(|b| format!("{:02x}", b))
-            .collect::<Vec<String>>()
-            .concat()
+        to_base16(&self.0, &mut [0u8; 40])
     }
 }
 
@@ -141,3 +255,185 @@ impl fmt::Display for FileId {
         f.write_str(&self.to_base16())
     }
 }
+
+#[inline]
+fn to_base16(src: &[u8], buf: &mut [u8]) -> String {
+    let mut i = 0;
+    for v in src {
+        buf[i] = BASE16_DIGITS[(v >> 4) as usize];
+        buf[i + 1] = BASE16_DIGITS[(v & 0x0f) as usize];
+        i += 2;
+    }
+
+    unsafe {
+        // Safety: We are only dealing with ASCII characters.
+        String::from_utf8_unchecked(buf.to_vec())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    struct ConversionCase {
+        id: u128,
+        kind: SpotifyAudioType,
+        uri: &'static str,
+        base16: &'static str,
+        base62: &'static str,
+        raw: &'static [u8],
+    }
+
+    static CONV_VALID: [ConversionCase; 4] = [
+        ConversionCase {
+            id: 238762092608182713602505436543891614649,
+            kind: SpotifyAudioType::Track,
+            uri: "spotify:track:5sWHDYs0csV6RS48xBl0tH",
+            base16: "b39fe8081e1f4c54be38e8d6f9f12bb9",
+            base62: "5sWHDYs0csV6RS48xBl0tH",
+            raw: &[
+                179, 159, 232, 8, 30, 31, 76, 84, 190, 56, 232, 214, 249, 241, 43, 185,
+            ],
+        },
+        ConversionCase {
+            id: 204841891221366092811751085145916697048,
+            kind: SpotifyAudioType::Track,
+            uri: "spotify:track:4GNcXTGWmnZ3ySrqvol3o4",
+            base16: "9a1b1cfbc6f244569ae0356c77bbe9d8",
+            base62: "4GNcXTGWmnZ3ySrqvol3o4",
+            raw: &[
+                154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216,
+            ],
+        },
+        ConversionCase {
+            id: 204841891221366092811751085145916697048,
+            kind: SpotifyAudioType::Podcast,
+            uri: "spotify:episode:4GNcXTGWmnZ3ySrqvol3o4",
+            base16: "9a1b1cfbc6f244569ae0356c77bbe9d8",
+            base62: "4GNcXTGWmnZ3ySrqvol3o4",
+            raw: &[
+                154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216,
+            ],
+        },
+        ConversionCase {
+            id: 204841891221366092811751085145916697048,
+            kind: SpotifyAudioType::NonPlayable,
+            uri: "spotify:unknown:4GNcXTGWmnZ3ySrqvol3o4",
+            base16: "9a1b1cfbc6f244569ae0356c77bbe9d8",
+            base62: "4GNcXTGWmnZ3ySrqvol3o4",
+            raw: &[
+                154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216,
+            ],
+        },
+    ];
+
+    static CONV_INVALID: [ConversionCase; 2] = [
+        ConversionCase {
+            id: 0,
+            kind: SpotifyAudioType::NonPlayable,
+            // Invalid ID in the URI.
+            uri: "spotify:arbitrarywhatever:5sWHDYs0Bl0tH",
+            base16: "ZZZZZ8081e1f4c54be38e8d6f9f12bb9",
+            base62: "!!!!!Ys0csV6RS48xBl0tH",
+            raw: &[
+                // Invalid length.
+                154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 5, 3, 108, 119, 187, 233, 216, 255,
+            ],
+        },
+        ConversionCase {
+            id: 0,
+            kind: SpotifyAudioType::NonPlayable,
+            // Missing colon between ID and type.
+            uri: "spotify:arbitrarywhatever5sWHDYs0csV6RS48xBl0tH",
+            base16: "--------------------",
+            base62: "....................",
+            raw: &[
+                // Invalid length.
+                154, 27, 28, 251,
+            ],
+        },
+    ];
+
+    #[test]
+    fn from_base62() {
+        for c in &CONV_VALID {
+            assert_eq!(SpotifyId::from_base62(c.base62).unwrap().id, c.id);
+        }
+
+        for c in &CONV_INVALID {
+            assert_eq!(SpotifyId::from_base62(c.base62), Err(SpotifyIdError));
+        }
+    }
+
+    #[test]
+    fn to_base62() {
+        for c in &CONV_VALID {
+            let id = SpotifyId {
+                id: c.id,
+                audio_type: c.kind,
+            };
+
+            assert_eq!(id.to_base62(), c.base62);
+        }
+    }
+
+    #[test]
+    fn from_base16() {
+        for c in &CONV_VALID {
+            assert_eq!(SpotifyId::from_base16(c.base16).unwrap().id, c.id);
+        }
+
+        for c in &CONV_INVALID {
+            assert_eq!(SpotifyId::from_base16(c.base16), Err(SpotifyIdError));
+        }
+    }
+
+    #[test]
+    fn to_base16() {
+        for c in &CONV_VALID {
+            let id = SpotifyId {
+                id: c.id,
+                audio_type: c.kind,
+            };
+
+            assert_eq!(id.to_base16(), c.base16);
+        }
+    }
+
+    #[test]
+    fn from_uri() {
+        for c in &CONV_VALID {
+            let actual = SpotifyId::from_uri(c.uri).unwrap();
+
+            assert_eq!(actual.id, c.id);
+            assert_eq!(actual.audio_type, c.kind);
+        }
+
+        for c in &CONV_INVALID {
+            assert_eq!(SpotifyId::from_uri(c.uri), Err(SpotifyIdError));
+        }
+    }
+
+    #[test]
+    fn to_uri() {
+        for c in &CONV_VALID {
+            let id = SpotifyId {
+                id: c.id,
+                audio_type: c.kind,
+            };
+
+            assert_eq!(id.to_uri(), c.uri);
+        }
+    }
+
+    #[test]
+    fn from_raw() {
+        for c in &CONV_VALID {
+            assert_eq!(SpotifyId::from_raw(c.raw).unwrap().id, c.id);
+        }
+
+        for c in &CONV_INVALID {
+            assert_eq!(SpotifyId::from_raw(c.raw), Err(SpotifyIdError));
+        }
+    }
+}