diff --git a/Cargo.toml b/Cargo.toml index 7278a4a..333c00a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,3 +25,6 @@ default = ["deflate", "zstd"] deflate = ["include-flate-compress/deflate"] zstd = ["include-flate-compress/zstd"] no-compression-warnings = ["include-flate-codegen/no-compression-warnings"] + +[[example]] +name = "flate" diff --git a/assets/hello-world.txt b/assets/hello-world.txt new file mode 100644 index 0000000..15c0ce8 --- /dev/null +++ b/assets/hello-world.txt @@ -0,0 +1 @@ +Hello, World!🌅 diff --git a/assets/one.txt b/assets/one.txt new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/assets/one.txt @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/codegen/src/lib.rs b/codegen/src/lib.rs index bbd9b73..f951947 100644 --- a/codegen/src/lib.rs +++ b/codegen/src/lib.rs @@ -20,12 +20,43 @@ use std::io::{Read, Seek}; use std::path::PathBuf; use std::str::{from_utf8, FromStr}; -use include_flate_compress::{apply_compression, CompressionMethod}; +use include_flate_compress::{apply_compression, compression_ratio, CompressionMethod}; use proc_macro::TokenStream; use proc_macro2::Span; use proc_macro_error::{emit_warning, proc_macro_error}; use quote::quote; -use syn::{Error, LitByteStr}; +use syn::{Error, LitByteStr, LitInt, Token}; + +/// This macro evaluates to `true` if the file should be compressed, `false` otherwise, at compile time. +/// Useful for conditional compilation without any efforts to the runtime. +/// +/// Please note that unlike the macro names suggest, this macro does **not** actually compress the file. +/// +/// # Parameters +/// This macro accepts custom compression methods and threshold conditions. +/// +/// # Returns +/// This macro expands to a `bool` literal that indicates whether the file should be compressed. +/// If no condition is specified, this macro always returns `true`. +#[proc_macro] +#[proc_macro_error] +pub fn deflate_if(ts: TokenStream) -> TokenStream { + match deflate_if_inner(ts, false) { + Ok(ts) => ts.into(), + Err(err) => err.to_compile_error().into(), + } +} + +/// This macro is identical to `deflate_if!()`, except it additionally performs UTF-8 validation. +/// See `deflate_if!` for more details. +#[proc_macro] +#[proc_macro_error] +pub fn deflate_utf8_if(ts: TokenStream) -> TokenStream { + match deflate_if_inner(ts, true) { + Ok(ts) => ts.into(), + Err(err) => err.to_compile_error().into(), + } +} /// `deflate_file!("file")` is equivalent to `include_bytes!("file.gz")`. /// @@ -46,7 +77,7 @@ use syn::{Error, LitByteStr}; #[proc_macro] #[proc_macro_error] pub fn deflate_file(ts: TokenStream) -> TokenStream { - match inner(ts, false) { + match deflate_inner(ts, false) { Ok(ts) => ts.into(), Err(err) => err.to_compile_error().into(), } @@ -60,7 +91,7 @@ pub fn deflate_file(ts: TokenStream) -> TokenStream { #[proc_macro] #[proc_macro_error] pub fn deflate_utf8_file(ts: TokenStream) -> TokenStream { - match inner(ts, true) { + match deflate_inner(ts, true) { Ok(ts) => ts.into(), Err(err) => err.to_compile_error().into(), } @@ -72,55 +103,183 @@ pub fn deflate_utf8_file(ts: TokenStream) -> TokenStream { /// flate!(pub static DATA: [u8] from "assets/009f.dat"); // default, DEFLATE /// flate!(pub static DATA: [u8] from "assets/009f.dat" with zstd); // Use Zstd for this file spcifically /// flate!(pub static DATA: [u8] from "assets/009f.dat" with deflate); // Explicitly use DEFLATE. +/// +/// flate!(pub static DATA: [u8] from "assets/009f.dat" if always); // Always compress regardless of compression ratio. +/// flate!(pub static DATA: [u8] from "assets/009f.dat" if less_than_original); // Compress only if the compressed size is smaller than the original size. +/// flate!(pub static DATA: [u8] from "assets/009f.dat" if compression_ratio_more_than 10%); // Compress only if the compression ratio is higher than 10%. /// ``` struct FlateArgs { path: syn::LitStr, algorithm: Option, + threshold: Option, } impl syn::parse::Parse for FlateArgs { fn parse(input: syn::parse::ParseStream) -> syn::Result { let path = input.parse()?; - let algorithm = if input.is_empty() { - None - } else { + let mut algorithm = None; + let mut threshold = None; + + while !input.is_empty() { let lookahead = input.lookahead1(); - if lookahead.peek(kw::deflate) { - input.parse::()?; - Some(CompressionMethodTy(CompressionMethod::Deflate)) - } else if lookahead.peek(kw::zstd) { - input.parse::()?; - Some(CompressionMethodTy(CompressionMethod::Zstd)) + if lookahead.peek(kw::deflate) || lookahead.peek(kw::zstd) { + algorithm = if lookahead.peek(kw::deflate) { + input.parse::()?; + Some(CompressionMethodTy(CompressionMethod::Deflate)) + } else { + input.parse::()?; + Some(CompressionMethodTy(CompressionMethod::Zstd)) + }; + } else if lookahead.peek(kw::always) + || lookahead.peek(kw::less_than_original) + || (lookahead.peek(kw::compression_ratio_more_than) + && input.peek2(syn::LitInt) + && input.peek3(Token![%])) + { + threshold = Some(input.parse()?); } else { return Err(lookahead.error()); } - }; + } + + Ok(Self { + path, + algorithm, + threshold, + }) + } +} + +/// A threshold condition for compression. +enum ThresholdCondition { + /// Always compress regardless of compression ratio. + /// This is the default behaviour. + Always, + /// Compress only if the compressed size is smaller than the original size. + LessThanOriginal, + /// Compress only if the compression ratio is higher than the given threshold. + CompressionRatioMoreThan(u64), +} + +impl syn::parse::Parse for ThresholdCondition { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let lookahead = input.lookahead1(); + if lookahead.peek(kw::always) { + input.parse::()?; + Ok(Self::Always) + } else if lookahead.peek(kw::less_than_original) { + input.parse::()?; + Ok(Self::LessThanOriginal) + } else if lookahead.peek(kw::compression_ratio_more_than) { + input.parse::()?; + let lit: LitInt = input.parse()?; + input.parse::()?; + Ok(Self::CompressionRatioMoreThan(lit.base10_parse()?)) + } else { + Err(lookahead.error()) + } + } +} - Ok(Self { path, algorithm }) +impl Into for ThresholdCondition { + fn into(self) -> u64 { + match self { + Self::Always => 0, + Self::LessThanOriginal => 100, + Self::CompressionRatioMoreThan(threshold) => threshold, + } } } +/// Custom keywords for the proc-macro. mod kw { + // `deflate` is a keyword that indicates that the file should be compressed with DEFLATE. syn::custom_keyword!(deflate); + // `zstd` is a keyword that indicates that the file should be compressed with Zstd. syn::custom_keyword!(zstd); + + // `always` is a keyword that indicates that the file should always be compressed. + syn::custom_keyword!(always); + // `less_than_original` is a keyword that indicates that the file should be compressed only if the compressed size is larger than the original size. + syn::custom_keyword!(less_than_original); + // `compression_ratio_more_than` is a keyword that indicates that the file should be compressed only if the compression ratio is less than the given threshold. + // For example, `compression_ratio_more_than 10%` means that the file should be compressed only if the compressed size is less than 10% of the original size. + syn::custom_keyword!(compression_ratio_more_than); } #[derive(Debug)] struct CompressionMethodTy(CompressionMethod); -fn compression_ratio(original_size: u64, compressed_size: u64) -> f64 { - (compressed_size as f64 / original_size as f64) * 100.0 +fn emap(error: E) -> Error { + Error::new(Span::call_site(), error) } -fn inner(ts: TokenStream, utf8: bool) -> syn::Result> { - fn emap(error: E) -> Error { - Error::new(Span::call_site(), error) +fn deflate_if_inner(ts: TokenStream, utf8: bool) -> syn::Result> { + let dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").map_err(emap)?); + + let args = syn::parse2::(ts.to_owned().into())?; + let path = PathBuf::from_str(&args.path.value()).map_err(emap)?; + let algo = args + .algorithm + .unwrap_or(CompressionMethodTy(CompressionMethod::Deflate)); + + if path.is_absolute() { + Err(emap("absolute paths are not supported"))?; } + let target = dir.join(&path); + let mut file = File::open(&target).map_err(emap)?; + let mut vec = Vec::::new(); + if utf8 { + std::io::copy(&mut file, &mut vec).map_err(emap)?; + from_utf8(&vec).map_err(emap)?; + } + + let mut compressed_buffer = Vec::::new(); + + { + let mut compressed_cursor = std::io::Cursor::new(&mut compressed_buffer); + let mut source: Box = if utf8 { + Box::new(std::io::Cursor::new(&vec)) + } else { + file.seek(std::io::SeekFrom::Start(0)).map_err(emap)?; + Box::new(&file) + }; + + apply_compression(&mut source, &mut compressed_cursor, algo.0).map_err(emap)?; + } + + let compression_ratio = compression_ratio( + fs::metadata(&target).map_err(emap)?.len(), + compressed_buffer.len() as u64, + ); + + // returns `true` if the file should be compressed, `false` otherwise. + match args.threshold { + Some(ThresholdCondition::Always) => Ok(quote!(true)), + Some(ThresholdCondition::LessThanOriginal) => { + if compressed_buffer.len() > vec.len() { + Ok(quote!(false)) + } else { + Ok(quote!(true)) + } + } + Some(ThresholdCondition::CompressionRatioMoreThan(threshold)) => { + if compression_ratio > threshold as f64 { + Ok(quote!(false)) + } else { + Ok(quote!(true)) + } + } + _ => Ok(quote!(true)), + } +} + +fn deflate_inner(ts: TokenStream, utf8: bool) -> syn::Result> { let dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").map_err(emap)?); - let args: FlateArgs = syn::parse2::(ts.to_owned().into())?; + let args = syn::parse2::(ts.to_owned().into())?; let path = PathBuf::from_str(&args.path.value()).map_err(emap)?; let algo = args .algorithm @@ -131,9 +290,7 @@ fn inner(ts: TokenStream, utf8: bool) -> syn::Result> { } let target = dir.join(&path); - let mut file = File::open(&target).map_err(emap)?; - let mut vec = Vec::::new(); if utf8 { std::io::copy(&mut file, &mut vec).map_err(emap)?; @@ -145,7 +302,7 @@ fn inner(ts: TokenStream, utf8: bool) -> syn::Result> { { let mut compressed_cursor = std::io::Cursor::new(&mut compressed_buffer); let mut source: Box = if utf8 { - Box::new(std::io::Cursor::new(vec)) + Box::new(std::io::Cursor::new(&vec)) } else { file.seek(std::io::SeekFrom::Start(0)).map_err(emap)?; Box::new(&file) @@ -157,21 +314,24 @@ fn inner(ts: TokenStream, utf8: bool) -> syn::Result> { let bytes = LitByteStr::new(&compressed_buffer, Span::call_site()); let result = quote!(#bytes); + let compression_ratio = compression_ratio( + fs::metadata(&target).map_err(emap)?.len(), + compressed_buffer.len() as u64, + ); + + // Default to 10% threshold + let threshold: u64 = args.threshold.map_or(10, |cond| cond.into()); + #[cfg(not(feature = "no-compression-warnings"))] { - let compression_ratio = compression_ratio( - fs::metadata(&target).map_err(emap)?.len(), - compressed_buffer.len() as u64, - ); - - if compression_ratio < 10.0f64 { + if compression_ratio < threshold as f64 { emit_warning!( - &args.path, - "Detected low compression ratio ({:.2}%) for file {:?} with `{:?}`. Consider using other compression methods.", - compression_ratio, - path.display(), - algo.0, - ); + &args.path, + "Detected low compression ratio ({:.2}%) for file {:?} with `{:?}`. Consider using other compression methods.", + compression_ratio, + path.display(), + algo.0, + ); } } diff --git a/compress/src/lib.rs b/compress/src/lib.rs index a4a0ffa..382e7c3 100644 --- a/compress/src/lib.rs +++ b/compress/src/lib.rs @@ -220,3 +220,7 @@ where io::copy(&mut decoder, writer)?; Ok(()) } + +pub fn compression_ratio(original_size: u64, compressed_size: u64) -> f64 { + (compressed_size as f64 / original_size as f64) * 100.0 +} diff --git a/examples/flate.rs b/examples/flate.rs new file mode 100644 index 0000000..d88e0d0 --- /dev/null +++ b/examples/flate.rs @@ -0,0 +1,22 @@ +// include-flate +// Copyright (C) SOFe, kkent030315 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use include_flate::flate; + +flate!(pub static HELLO_WORLD: str from "assets/hello-world.txt" with deflate if always); + +fn main() { + println!("{}", *HELLO_WORLD); +} diff --git a/src/lib.rs b/src/lib.rs index e0c3861..e842699 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,28 +96,43 @@ pub use once_cell::sync::Lazy; #[macro_export] macro_rules! flate { ($(#[$meta:meta])* - $(pub $(($($vis:tt)+))?)? static $name:ident: [u8] from $path:literal $(with $algo:ident)?) => { + $(pub $(($($vis:tt)+))?)? static $name:ident: [u8] from $path:literal $(with $algo:ident)? $(if $($threshold:tt)+)?) => { // HACK: workaround to make cargo auto rebuild on modification of source file const _: &'static [u8] = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/", $path)); $(#[$meta])* $(pub $(($($vis)+))?)? static $name: $crate::Lazy<::std::vec::Vec> = $crate::Lazy::new(|| { - $crate::decode($crate::codegen::deflate_file!($path), None) + // Evaluate the condition at compile time to avoid unnecessary runtime checks + if $crate::codegen::deflate_if!($path $($algo)? $($($threshold)+)?) { + let algo = match stringify!($($algo)?){ + "deflate" => $crate::CompressionMethod::Deflate, + "zstd" => $crate::CompressionMethod::Zstd, + _ => $crate::CompressionMethod::default(), + }; + $crate::decode($crate::codegen::deflate_file!($path $($algo)?), Some($crate::CompressionMethodTy(algo))) + } else { + include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/", $path)).to_vec() + } }); }; ($(#[$meta:meta])* - $(pub $(($($vis:tt)+))?)? static $name:ident: str from $path:literal $(with $algo:ident)?) => { + $(pub $(($($vis:tt)+))?)? static $name:ident: str from $path:literal $(with $algo:ident)? $(if $($threshold:tt)+)?) => { // HACK: workaround to make cargo auto rebuild on modification of source file const _: &'static str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/", $path)); $(#[$meta])* $(pub $(($($vis)+))?)? static $name: $crate::Lazy<::std::string::String> = $crate::Lazy::new(|| { - let algo = match stringify!($($algo)?){ - "deflate" => $crate::CompressionMethod::Deflate, - "zstd" => $crate::CompressionMethod::Zstd, - _ => $crate::CompressionMethod::default(), - }; - $crate::decode_string($crate::codegen::deflate_utf8_file!($path $($algo)?), Some($crate::CompressionMethodTy(algo))) + // Evaluate the condition at compile time to avoid unnecessary runtime checks + if $crate::codegen::deflate_if!($path $($algo)? $($($threshold)+)?) { + let algo = match stringify!($($algo)?){ + "deflate" => $crate::CompressionMethod::Deflate, + "zstd" => $crate::CompressionMethod::Zstd, + _ => $crate::CompressionMethod::default(), + }; + $crate::decode_string($crate::codegen::deflate_utf8_file!($path $($algo)?), Some($crate::CompressionMethodTy(algo))) + } else { + include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/", $path)).to_string() + } }); }; } diff --git a/test_util.rs b/test_util.rs index 9376ae4..cb405ad 100644 --- a/test_util.rs +++ b/test_util.rs @@ -70,3 +70,16 @@ pub fn verify_str(name: &str, data: &str) { data ); } + +/// Decompress with the provided method and verify the result +pub fn verify_with(name: &str, compressed_data: &[u8], method: CompressionMethod) { + let mut decompressed_buffer = Vec::new(); + { + let mut compressed_cursor = std::io::Cursor::new(compressed_data); + let mut decompressed_cursor = std::io::Cursor::new(&mut decompressed_buffer); + apply_decompression(&mut compressed_cursor, &mut decompressed_cursor, method).unwrap(); + decompressed_cursor.seek(SeekFrom::Start(0)).unwrap(); // Reset cursor position + } + assert_ne!(compressed_data, decompressed_buffer.as_slice()); + assert_eq!(read_file(name), decompressed_buffer.as_slice()); +} diff --git a/tests/deflate-if.rs b/tests/deflate-if.rs new file mode 100644 index 0000000..eeac599 --- /dev/null +++ b/tests/deflate-if.rs @@ -0,0 +1,62 @@ +// include-flate +// Copyright (C) SOFe, kkent030315 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +include!("../test_util.rs"); + +use include_flate::flate; +use include_flate_codegen::deflate_if; + +// `assets/one.txt` is a file containing the string `1`. +// By the nature of compression, small datas will likely be larger after compression. + +flate!(pub static ONE_ALWAYS: [u8] from "assets/one.txt" if always); +flate!(pub static ONE_DEFLATE_ALWAYS: [u8] from "assets/one.txt" with deflate if always); +flate!(pub static ONE_ZSTD_ALWAYS: [u8] from "assets/one.txt" with zstd if always); + +flate!(pub static ONE_LESS_THAN_ORIGINAL: [u8] from "assets/one.txt" if less_than_original); +flate!(pub static ONE_DEFLATE_LESS_THAN_ORIGINAL: [u8] from "assets/one.txt" with deflate if less_than_original); +flate!(pub static ONE_ZSTD_LESS_THAN_ORIGINAL: [u8] from "assets/one.txt" with zstd if less_than_original); + +#[test] +fn test() { + verify("one.txt", &ONE_ALWAYS); + verify("one.txt", &ONE_DEFLATE_ALWAYS); + verify("one.txt", &ONE_ZSTD_ALWAYS); + + verify("one.txt", &ONE_LESS_THAN_ORIGINAL); + verify("one.txt", &ONE_DEFLATE_LESS_THAN_ORIGINAL); + verify("one.txt", &ONE_ZSTD_LESS_THAN_ORIGINAL); + + assert_eq!(deflate_if!("assets/one.txt" zstd always), true); + assert_eq!(deflate_if!("assets/one.txt" deflate always), true); + + // The compressed data is larger than the original data (as expected), + // so it should not be deflated. + assert_eq!(deflate_if!("assets/one.txt" zstd less_than_original), false); + assert_eq!( + deflate_if!("assets/one.txt" deflate less_than_original), + false + ); + + // The compressed data is bigger than the original data (as expected), + assert_eq!( + deflate_if!("assets/one.txt" zstd compression_ratio_more_than 10%), + false + ); + assert_eq!( + deflate_if!("assets/one.txt" deflate compression_ratio_more_than 10%), + false + ); +} diff --git a/tests/syntax.rs b/tests/syntax.rs new file mode 100644 index 0000000..314b21c --- /dev/null +++ b/tests/syntax.rs @@ -0,0 +1,32 @@ +// include-flate +// Copyright (C) SOFe, kkent030315 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use include_flate::flate; + +flate!(pub static DATA1: [u8] from "assets/random.dat" with zstd if always); +flate!(pub static DATA2: [u8] from "assets/random.dat" with deflate if less_than_original); +flate!(pub static DATA3: [u8] from "assets/random.dat" with deflate if compression_ratio_more_than 0%); + +flate!(pub static DATA4: [u8] from "assets/random.dat" if always); +flate!(pub static DATA5: [u8] from "assets/random.dat" if less_than_original); +flate!(pub static DATA6: [u8] from "assets/random.dat" if compression_ratio_more_than 0%); + +flate!(pub static DATA7: str from "assets/chinese.txt" with zstd if always); +flate!(pub static DATA8: str from "assets/chinese.txt" with deflate if less_than_original); +flate!(pub static DATA9: str from "assets/chinese.txt" with deflate if compression_ratio_more_than 0%); + +flate!(pub static DATA10: str from "assets/chinese.txt" if always); +flate!(pub static DATA11: str from "assets/chinese.txt" if less_than_original); +flate!(pub static DATA12: str from "assets/chinese.txt" if compression_ratio_more_than 0%); diff --git a/tests/with-compress.rs b/tests/with-compress.rs new file mode 100644 index 0000000..6560cab --- /dev/null +++ b/tests/with-compress.rs @@ -0,0 +1,44 @@ +// include-flate +// Copyright (C) SOFe, kkent030315 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +include!("../test_util.rs"); + +use include_flate::flate; +use include_flate_codegen::deflate_file; + +// Compression method is defaulted to deflate. +flate!(pub static DATA_DEFLATE1: [u8] from "assets/random.dat"); + +flate!(pub static DATA_DEFLATE2: [u8] from "assets/random.dat" with deflate); +flate!(pub static DATA_ZSTD: [u8] from "assets/random.dat" with zstd); + +#[test] +fn test() { + verify_with( + "random.dat", + deflate_file!("assets/random.dat"), + CompressionMethod::Deflate, + ); + verify_with( + "random.dat", + deflate_file!("assets/random.dat" deflate), + CompressionMethod::Deflate, + ); + verify_with( + "random.dat", + deflate_file!("assets/random.dat" zstd), + CompressionMethod::Zstd, + ); +}