From 818e83d260ef6082cf61da1aac746c3948f17a6b Mon Sep 17 00:00:00 2001 From: Michael Bradley Date: Thu, 30 Jan 2025 20:19:08 -0500 Subject: [PATCH 1/8] Extract Sift{Error, Result} from pure BinaryHeap --- src/backing/containers/mod.rs | 2 ++ src/backing/containers/sift_result.rs | 26 ++++++++++++++++++++++++++ src/backing/pure/binary_heap.rs | 27 +-------------------------- 3 files changed, 29 insertions(+), 26 deletions(-) create mode 100644 src/backing/containers/sift_result.rs diff --git a/src/backing/containers/mod.rs b/src/backing/containers/mod.rs index 74cfd32..992fa05 100644 --- a/src/backing/containers/mod.rs +++ b/src/backing/containers/mod.rs @@ -1,5 +1,7 @@ mod pair; mod py_item; +mod sift_result; pub use pair::Pair; pub use py_item::PyItem; +pub use sift_result::{SiftError, SiftResult}; diff --git a/src/backing/containers/sift_result.rs b/src/backing/containers/sift_result.rs new file mode 100644 index 0000000..8b1e323 --- /dev/null +++ b/src/backing/containers/sift_result.rs @@ -0,0 +1,26 @@ +use std::fmt::{Display, Formatter, Result as fmtResult}; + +/// Indicates why a sift failed +#[derive(Debug, Clone)] +pub struct SiftError { + /// The index that couldn't be sifted + index: usize, + /// The length of the array + len: usize, +} + +impl SiftError { + /// Instantiates a `SiftError` + pub fn new(index: usize, len: usize) -> Self { + Self { index, len } + } +} + +impl Display for SiftError { + fn fmt(&self, f: &mut Formatter) -> fmtResult { + write!(f, "Could not sift index {} of {}", self.index, self.len) + } +} + +/// Whether a sift operation succeeded +pub type SiftResult = Result<(), SiftError>; diff --git a/src/backing/pure/binary_heap.rs b/src/backing/pure/binary_heap.rs index cd84f18..2b43ec0 100644 --- a/src/backing/pure/binary_heap.rs +++ b/src/backing/pure/binary_heap.rs @@ -1,32 +1,7 @@ -use std::fmt; +use crate::backing::containers::{SiftError, SiftResult}; use super::PureBacking; -/// Indicates why a sift failed -#[derive(Debug, Clone)] -struct SiftError { - /// The index that couldn't be sifted - index: usize, - /// The length of the array - len: usize, -} - -impl SiftError { - /// Instantiates a `SiftError` - fn new(index: usize, len: usize) -> Self { - Self { index, len } - } -} - -impl fmt::Display for SiftError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Could not sift index {} of {}", self.index, self.len) - } -} - -/// Whether a sift operation succeeded -type SiftResult = Result<(), SiftError>; - /// A binary min-heap backed by an array #[derive(Debug)] pub struct BinaryHeap { From d8fef9b806bd3cb723f1fe7ecf825a647b84703f Mon Sep 17 00:00:00 2001 From: Michael Bradley Date: Thu, 30 Jan 2025 20:22:43 -0500 Subject: [PATCH 2/8] Update README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 494f4b8..7e602aa 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ # pyority_queue Implementations of priority queues in Python using Rust bindings for speed. + +## Purpose + +I'm doing this to learn Rust, so don't judge me too hard lol. +I've noted a few places where I'm unhappy with the implementation (mostly w.r.t. code duplication), but hopefully as I become more comfortable in Rust I can go back and fix those. +Better to have a bad implementation now than a good implementation never is my view. From d56d2cf117f58df0d6847a386662f45c8be91d5c Mon Sep 17 00:00:00 2001 From: Michael Bradley Date: Fri, 31 Jan 2025 01:01:10 -0500 Subject: [PATCH 3/8] Add more container functions to support future indexed binary heap --- src/backing/containers/pair.rs | 18 ++++++++++++++++-- src/backing/containers/py_item.rs | 2 ++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/backing/containers/pair.rs b/src/backing/containers/pair.rs index d194835..190cd7b 100644 --- a/src/backing/containers/pair.rs +++ b/src/backing/containers/pair.rs @@ -13,11 +13,25 @@ impl Pair { Self { data, priority } } - /// Retrieves the internal data. - /// It would be nicer to implement this using [`From`] or [`Into`], but I don't see a way to do that using generics. + /// Returns the internal data. + /// It might(?) be nicer to implement this using [`From`] or [`Into`], but I don't see a way to do that using generics. pub fn data(self) -> D { self.data } + + pub fn get_data(&self) -> &D { + &self.data + } + + /// Retrieve the priority associated with the data + pub fn get_priority(&self) -> &P { + &self.priority + } + + /// Update the priority associated with the data + pub fn set_priority(&mut self, priority: P) { + self.priority = priority + } } impl PartialOrd for Pair { diff --git a/src/backing/containers/py_item.rs b/src/backing/containers/py_item.rs index 363c853..f67c853 100644 --- a/src/backing/containers/py_item.rs +++ b/src/backing/containers/py_item.rs @@ -48,6 +48,8 @@ impl PartialEq for PyItem { } } +impl Eq for PyItem {} + impl Hash for PyItem { fn hash(&self, state: &mut H) { // TODO: Should warn or fail instead of defaulting to 0 From 846a5424deed9185178bb9975ff64c66225ff827 Mon Sep 17 00:00:00 2001 From: Michael Bradley Date: Fri, 31 Jan 2025 01:02:16 -0500 Subject: [PATCH 4/8] Implement IndexedBinaryHeap API Still missing core sift functions, but passes 3/27 tests now --- src/backing/indexed/binary_heap.rs | 90 ++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 11 deletions(-) diff --git a/src/backing/indexed/binary_heap.rs b/src/backing/indexed/binary_heap.rs index 84415ec..d438dce 100644 --- a/src/backing/indexed/binary_heap.rs +++ b/src/backing/indexed/binary_heap.rs @@ -1,49 +1,117 @@ use std::{collections::HashMap, hash::Hash}; -use crate::backing::{containers::Pair, indexed::IndexedBacking}; +use crate::backing::{ + containers::{Pair, SiftResult}, + indexed::IndexedBacking, +}; -pub struct IndexedBinaryHeap { +pub struct IndexedBinaryHeap< + D: Hash + Eq + Clone + Send + Sync, + P: PartialOrd + Clone + Send + Sync, +> { data: Vec>, indices: HashMap, } -impl IndexedBinaryHeap { +impl + IndexedBinaryHeap +{ pub fn new() -> Self { Self { data: Vec::new(), indices: HashMap::new(), } } + + fn sift_up(&mut self, i: usize) -> SiftResult { + todo!() + } + + fn sift_down(&mut self, i: usize) -> SiftResult { + todo!() + } + + fn delete_pair(&mut self, i: usize) -> Option> { + if i >= self.data.len() { + return None; + } + let pair = self.data[i].clone(); + if i < self.data.len() - 1 { + self.data[i] = self.data.pop().unwrap(); + if self.data[i] < pair { + self.sift_up(i).unwrap(); + } else { + self.sift_down(i).unwrap(); + } + } + self.indices.remove(pair.get_data()); + Some(pair) + } } -impl FromIterator<(D, P)> +impl FromIterator<(D, P)> for IndexedBinaryHeap { fn from_iter>(iter: T) -> Self { - todo!() + let mut this = Self::new(); + for (i, (data, priority)) in iter.into_iter().enumerate() { + this.indices.insert(data.clone(), i); + this.data.push(Pair::new(data, priority)); + } + for i in (0..=(this.data.len() / 2)).rev() { + this.sift_down(i).expect("Index error during heapify"); + } + this } } -impl IndexedBacking +impl IndexedBacking for IndexedBinaryHeap { fn len(&self) -> usize { - todo!() + self.data.len() } fn contains(&self, data: &D) -> bool { - todo!() + self.indices.contains_key(data) } fn set(&mut self, data: D, priority: P) { - todo!() + if let Some(index) = self.indices.get(&data) { + let pair = self.data.get_mut(*index).unwrap(); + let old_priority = pair.get_priority(); + if priority < *old_priority { + pair.set_priority(priority); + self.sift_up(*index).unwrap(); + } else { + pair.set_priority(priority); + self.sift_down(*index).unwrap(); + } + } else { + let final_index = self.data.len(); + self.indices.insert(data.clone(), final_index); + self.data.push(Pair::new(data, priority)); + self.sift_up(final_index).unwrap(); + } } fn remove(&mut self, data: D) -> Option

{ - todo!() + if let Some(index) = self.indices.get(&data) { + if let Some(pair) = self.delete_pair(*index) { + Some(pair.get_priority().clone()) + } else { + None + } + } else { + None + } } fn pop(&mut self) -> Option { - todo!() + if let Some(pair) = self.delete_pair(0) { + Some(pair.data()) + } else { + None + } } } From 02e3335204a8b4a740b012a7c50678466a837dde Mon Sep 17 00:00:00 2001 From: Michael Bradley Date: Fri, 31 Jan 2025 01:38:37 -0500 Subject: [PATCH 5/8] Initial implementation of IndexedBinaryHeap sift functions --- src/backing/indexed/binary_heap.rs | 73 ++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 3 deletions(-) diff --git a/src/backing/indexed/binary_heap.rs b/src/backing/indexed/binary_heap.rs index d438dce..cea10c4 100644 --- a/src/backing/indexed/binary_heap.rs +++ b/src/backing/indexed/binary_heap.rs @@ -1,7 +1,7 @@ use std::{collections::HashMap, hash::Hash}; use crate::backing::{ - containers::{Pair, SiftResult}, + containers::{Pair, SiftError, SiftResult}, indexed::IndexedBacking, }; @@ -24,11 +24,78 @@ impl } fn sift_up(&mut self, i: usize) -> SiftResult { - todo!() + print!("{i}"); + if i == 0 { + // Base case, at root so nothing to do + Ok(()) + } else if let Some(child) = self.data.get(i).cloned() { + let parent_index = (i - 1) / 2; + // Check if the heap property is violated + if child < self.data[parent_index] { + (self.data[i], self.data[parent_index]) = + (self.data[parent_index].clone(), self.data[i].clone()); + self.indices.insert(self.data[i].clone().data(), i).unwrap(); + self.indices + .insert(self.data[parent_index].clone().data(), parent_index) + .unwrap(); + self.sift_up(parent_index) + } else { + // Sift complete + Ok(()) + } + } else { + // Tried to sift a non-existent index + Err(SiftError::new(i, self.data.len())) + } } fn sift_down(&mut self, i: usize) -> SiftResult { - todo!() + print!("{i}"); + if i > self.data.len() { + // Tried to sift a non-existent index + Err(SiftError::new(i, self.data.len())) + } else { + if let Some(first_child) = self.data.get(i * 2 + 1).cloned() { + let smaller_child_index; + let smaller_child; + + // Find the smallest child and its index + if let Some(second_child) = self.data.get(i * 2 + 2).cloned() { + // Both children, use the smaller one + if first_child < second_child { + smaller_child = first_child; + smaller_child_index = i * 2 + 1; + } else { + smaller_child = second_child; + smaller_child_index = i * 2 + 2; + } + } else { + // Only one child, no choice + smaller_child = first_child; + smaller_child_index = i * 2 + 1; + } + + if smaller_child < self.data[i] { + // Swap parent with child + self.data[smaller_child_index] = self.data[i].clone(); + self.indices.insert( + self.data[smaller_child_index].clone().data(), + smaller_child_index, + ); + self.data[i] = smaller_child.clone(); + self.indices.insert(smaller_child.data(), i); + + // Repeat process with child + self.sift_down(smaller_child_index) + } else { + // Heap property satisfied, we're done + Ok(()) + } + } else { + // Base case, no children so nothing to do + Ok(()) + } + } } fn delete_pair(&mut self, i: usize) -> Option> { From 5fbb9de2dc798e70339fff4dcffbf222dddf9e27 Mon Sep 17 00:00:00 2001 From: Michael Bradley Date: Fri, 31 Jan 2025 01:54:33 -0500 Subject: [PATCH 6/8] Fix IBH not deleting last pair --- src/backing/indexed/binary_heap.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backing/indexed/binary_heap.rs b/src/backing/indexed/binary_heap.rs index cea10c4..9c69025 100644 --- a/src/backing/indexed/binary_heap.rs +++ b/src/backing/indexed/binary_heap.rs @@ -110,6 +110,8 @@ impl } else { self.sift_down(i).unwrap(); } + } else { + self.data.pop().unwrap(); } self.indices.remove(pair.get_data()); Some(pair) From ed1bd67b86cc15aeef3a99a4868807d314976f06 Mon Sep 17 00:00:00 2001 From: Michael Bradley Date: Fri, 31 Jan 2025 01:59:03 -0500 Subject: [PATCH 7/8] Properly handle duplicates in IBH from_iter --- src/backing/indexed/binary_heap.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/backing/indexed/binary_heap.rs b/src/backing/indexed/binary_heap.rs index 9c69025..05c73e6 100644 --- a/src/backing/indexed/binary_heap.rs +++ b/src/backing/indexed/binary_heap.rs @@ -124,8 +124,12 @@ impl Fr fn from_iter>(iter: T) -> Self { let mut this = Self::new(); for (i, (data, priority)) in iter.into_iter().enumerate() { - this.indices.insert(data.clone(), i); - this.data.push(Pair::new(data, priority)); + if let Some(prev) = this.indices.insert(data.clone(), i) { + this.indices.insert(data.clone(), prev).unwrap(); + this.data[prev] = Pair::new(data, priority); + } else { + this.data.push(Pair::new(data, priority)); + } } for i in (0..=(this.data.len() / 2)).rev() { this.sift_down(i).expect("Index error during heapify"); From 70295da0ba0a5b6ab347d3fef7d3dfa956510694 Mon Sep 17 00:00:00 2001 From: Michael Bradley Date: Fri, 31 Jan 2025 01:59:43 -0500 Subject: [PATCH 8/8] Check more return values are as expected By "Check" I mean I'm just unwrapping ones that should exist because I'm lazy --- src/backing/indexed/binary_heap.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/backing/indexed/binary_heap.rs b/src/backing/indexed/binary_heap.rs index 05c73e6..2f5dbc6 100644 --- a/src/backing/indexed/binary_heap.rs +++ b/src/backing/indexed/binary_heap.rs @@ -78,12 +78,14 @@ impl if smaller_child < self.data[i] { // Swap parent with child self.data[smaller_child_index] = self.data[i].clone(); - self.indices.insert( - self.data[smaller_child_index].clone().data(), - smaller_child_index, - ); + self.indices + .insert( + self.data[smaller_child_index].clone().data(), + smaller_child_index, + ) + .unwrap(); self.data[i] = smaller_child.clone(); - self.indices.insert(smaller_child.data(), i); + self.indices.insert(smaller_child.data(), i).unwrap(); // Repeat process with child self.sift_down(smaller_child_index) @@ -113,7 +115,7 @@ impl } else { self.data.pop().unwrap(); } - self.indices.remove(pair.get_data()); + self.indices.remove(pair.get_data()).unwrap(); Some(pair) } } @@ -162,7 +164,9 @@ impl In } } else { let final_index = self.data.len(); - self.indices.insert(data.clone(), final_index); + if let Some(_) = self.indices.insert(data.clone(), final_index) { + panic!("Item was not consistently hashed") + } self.data.push(Pair::new(data, priority)); self.sift_up(final_index).unwrap(); }