-
Notifications
You must be signed in to change notification settings - Fork 315
feat: Add image hashing functions with support for 5 algorithms #5229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
731e6de
08e0001
e06a8e7
bf3c16a
cd5a5de
a17fbd7
e62c624
96e7443
1465d6b
41277ff
f00b77e
18bf358
0bc37da
3f993b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ | |
|
||
from daft.daft import ImageFormat, ImageMode | ||
from daft.datatype import DataType | ||
from daft.expressions import Expression | ||
from daft.expressions import Expression, lit | ||
|
||
|
||
def resize(expr: Expression, w: int, h: int) -> Expression: | ||
|
@@ -87,3 +87,24 @@ def convert_image(expr: Expression, mode: str | ImageMode) -> Expression: | |
if not isinstance(mode, ImageMode): | ||
raise ValueError(f"mode must be a string or ImageMode variant, but got: {mode}") | ||
return Expression._call_builtin_scalar_fn("to_mode", expr, mode=mode) | ||
|
||
|
||
def image_hash( | ||
expr: Expression, | ||
algorithm: Literal["average", "perceptual", "difference", "wavelet", "crop_resistant"] = "average", | ||
) -> Expression: | ||
"""Computes the hash of an image using the specified algorithm. | ||
|
||
Args: | ||
expr: Expression to compute hash for. | ||
algorithm: The hashing algorithm to use. Options are: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add some details or links or something to explain the types of hashing methods and what their relative strengths and weaknesses are? This would show up on the docs too |
||
- "average": Average hash (default) | ||
- "perceptual": Perceptual hash | ||
- "difference": Difference hash | ||
- "wavelet": Wavelet hash | ||
- "crop_resistant": Crop-resistant hash | ||
|
||
Returns: | ||
Expression: A Utf8 expression representing the hash of the image. | ||
""" | ||
return Expression._call_builtin_scalar_fn("image_hash", expr, algorithm=lit(algorithm)) |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,105 @@ | ||||||||||||||||||||||||
use common_error::{DaftError, DaftResult}; | ||||||||||||||||||||||||
use daft_core::{ | ||||||||||||||||||||||||
lit::{FromLiteral, Literal}, | ||||||||||||||||||||||||
prelude::*, | ||||||||||||||||||||||||
}; | ||||||||||||||||||||||||
use daft_dsl::{ | ||||||||||||||||||||||||
ExprRef, | ||||||||||||||||||||||||
functions::{FunctionArgs, ScalarUDF}, | ||||||||||||||||||||||||
}; | ||||||||||||||||||||||||
use serde::{Deserialize, Serialize}; | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] | ||||||||||||||||||||||||
pub struct ImageHash; | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] | ||||||||||||||||||||||||
pub enum ImageHashAlgorithm { | ||||||||||||||||||||||||
Average, | ||||||||||||||||||||||||
Perceptual, | ||||||||||||||||||||||||
Difference, | ||||||||||||||||||||||||
Wavelet, | ||||||||||||||||||||||||
CropResistant, | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
impl std::str::FromStr for ImageHashAlgorithm { | ||||||||||||||||||||||||
type Err = DaftError; | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||||||||||||||||||||
match s.to_lowercase().as_str() { | ||||||||||||||||||||||||
"average" => Ok(Self::Average), | ||||||||||||||||||||||||
"perceptual" => Ok(Self::Perceptual), | ||||||||||||||||||||||||
"difference" => Ok(Self::Difference), | ||||||||||||||||||||||||
"wavelet" => Ok(Self::Wavelet), | ||||||||||||||||||||||||
"crop_resistant" => Ok(Self::CropResistant), | ||||||||||||||||||||||||
_ => Err(DaftError::ValueError(format!( | ||||||||||||||||||||||||
"Invalid image hash algorithm: {}. Must be one of: average, perceptual, difference, wavelet, crop_resistant", | ||||||||||||||||||||||||
s | ||||||||||||||||||||||||
))), | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
impl FromLiteral for ImageHashAlgorithm { | ||||||||||||||||||||||||
fn try_from_literal(lit: &Literal) -> DaftResult<Self> { | ||||||||||||||||||||||||
match lit { | ||||||||||||||||||||||||
Literal::Utf8(s) => s.parse(), | ||||||||||||||||||||||||
_ => Err(DaftError::TypeError(format!( | ||||||||||||||||||||||||
"Expected string literal for image hash algorithm, got: {:?}", | ||||||||||||||||||||||||
lit | ||||||||||||||||||||||||
))), | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
#[typetag::serde] | ||||||||||||||||||||||||
impl ScalarUDF for ImageHash { | ||||||||||||||||||||||||
fn call(&self, inputs: FunctionArgs<Series>) -> DaftResult<Series> { | ||||||||||||||||||||||||
let input = inputs.required((0, "input"))?; | ||||||||||||||||||||||||
let algorithm_series = inputs.required((1, "algorithm"))?; | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
// Extract the algorithm from the series (should be a scalar) | ||||||||||||||||||||||||
let algorithm_str = algorithm_series.utf8()?.get(0).ok_or_else(|| { | ||||||||||||||||||||||||
DaftError::ValueError("algorithm must be a scalar string".to_string()) | ||||||||||||||||||||||||
})?; | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
let algorithm: ImageHashAlgorithm = algorithm_str.parse()?; | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
// Convert enum to string and call the unified image_hash function | ||||||||||||||||||||||||
let algorithm_str = match algorithm { | ||||||||||||||||||||||||
ImageHashAlgorithm::Average => "average", | ||||||||||||||||||||||||
ImageHashAlgorithm::Perceptual => "perceptual", | ||||||||||||||||||||||||
ImageHashAlgorithm::Difference => "difference", | ||||||||||||||||||||||||
ImageHashAlgorithm::Wavelet => "wavelet", | ||||||||||||||||||||||||
ImageHashAlgorithm::CropResistant => "crop_resistant", | ||||||||||||||||||||||||
}; | ||||||||||||||||||||||||
Comment on lines
+68
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: redundant enum-to-string conversion after parsing string-to-enum on line 65
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or alternatively do the validation in Python and pass the enum itself into Rust? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True that's a good suggestion too. Here's a similar pattern that uses our FromLiteral trait which does a lot of this work for you. Define the type, implement FromLiteral, then define an Args type with derive(FunctionArgs). /// Supported codecs for the decode and encode functions.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum Codec {
Base64,
Deflate,
Gzip,
Utf8,
Zlib,
}
impl FromLiteral for Codec {
fn try_from_literal(lit: &Literal) -> DaftResult<Self> {
if let Literal::Utf8(s) = lit {
s.parse()
} else {
Err(DaftError::ValueError(format!(
"Expected a string literal, got {:?}",
lit
)))
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, FunctionArgs)]
struct Args<T> {
input: T,
codec: Codec,
}
// usage
fn call(&self, inputs: daft_dsl::functions::FunctionArgs<Series>) -> DaftResult<Series> {
let Args { input, codec } = inputs.try_into()?;
} |
||||||||||||||||||||||||
|
||||||||||||||||||||||||
crate::series::image_hash(input, algorithm_str) | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
fn name(&self) -> &'static str { | ||||||||||||||||||||||||
"image_hash" | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
fn get_return_field( | ||||||||||||||||||||||||
&self, | ||||||||||||||||||||||||
inputs: FunctionArgs<ExprRef>, | ||||||||||||||||||||||||
schema: &Schema, | ||||||||||||||||||||||||
) -> DaftResult<Field> { | ||||||||||||||||||||||||
let input = inputs.required((0, "input"))?; | ||||||||||||||||||||||||
let field = input.to_field(schema)?; | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
match field.dtype { | ||||||||||||||||||||||||
DataType::Image(_) | DataType::FixedShapeImage(..) => { | ||||||||||||||||||||||||
Ok(Field::new(field.name, DataType::Utf8)) | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
_ => Err(DaftError::TypeError(format!( | ||||||||||||||||||||||||
"Image hash can only be applied to ImageArrays, got {}", | ||||||||||||||||||||||||
field.dtype | ||||||||||||||||||||||||
))), | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
fn docstring(&self) -> &'static str { | ||||||||||||||||||||||||
"Computes the hash of an image using the specified algorithm. Supports average, perceptual, difference, wavelet, and crop_resistant algorithms." | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please run the pre-commit styles on your PR for consistency?