diff --git a/crates/resvg/tests/tests/text/direction/rtl.png b/crates/resvg/tests/tests/text/direction/rtl.png index 1d0416d25..ef52b109b 100644 Binary files a/crates/resvg/tests/tests/text/direction/rtl.png and b/crates/resvg/tests/tests/text/direction/rtl.png differ diff --git a/crates/usvg/src/text/layout.rs b/crates/usvg/src/text/layout.rs index 77e37cc90..05db66731 100644 --- a/crates/usvg/src/text/layout.rs +++ b/crates/usvg/src/text/layout.rs @@ -1336,7 +1336,7 @@ pub(crate) fn shape_text( None => break 'outer, }; - // Shape again, using a new font. + // Shape the whole text again, using the new font. let fallback_glyphs = shape_text_with_font( text, fallback_font.clone(), @@ -1349,27 +1349,10 @@ pub(crate) fn shape_text( ) .unwrap_or_default(); - let all_matched = fallback_glyphs.iter().all(|g| !g.is_missing()); - if all_matched { - // Replace all glyphs when all of them were matched. - glyphs = fallback_glyphs; - break 'outer; - } - - // We assume, that shaping with an any font will produce the same amount of glyphs. - // This is incorrect, but good enough for now. - if glyphs.len() != fallback_glyphs.len() { - break 'outer; - } - - // TODO: Replace clusters and not glyphs. This should be more accurate. - - // Copy new glyphs. - for i in 0..glyphs.len() { - if glyphs[i].is_missing() && !fallback_glyphs[i].is_missing() { - glyphs[i] = fallback_glyphs[i].clone(); - } - } + // Merge the newly shaped glyphs into the current ones, replacing + // every text cluster that is still missing and that the fallback + // font is able to resolve. + merge_fallback_glyphs(&mut glyphs, &fallback_glyphs, text); // Remember this font. used_fonts.push(fallback_font.id); @@ -1394,6 +1377,88 @@ pub(crate) fn shape_text( glyphs } +/// Merges fallback glyphs into the base glyphs. +/// +/// Both `base` and `fallback` are the result of shaping the same `text` with two +/// different fonts. Every text cluster that is still missing (`.notdef`) in +/// `base` is replaced with the corresponding glyphs from `fallback`, but only if +/// the fallback font is able to resolve that whole cluster. +/// +/// The two shapings can disagree on cluster boundaries. The most important case +/// are multi-codepoint emoji (flags and other ZWJ sequences): the primary font +/// produces one `.notdef` glyph per codepoint, while the emoji font ligates the +/// whole sequence into a single glyph. To merge them correctly we cut the text +/// only at boundaries shared by *both* shapings and replace whole clusters at a +/// time, instead of trying to align the two glyph lists one by one (which fails +/// as soon as they have a different length). +fn merge_fallback_glyphs(base: &mut Vec, fallback: &[Glyph], text: &str) { + if fallback.is_empty() || base.iter().all(|g| !g.is_missing()) { + return; + } + + // Byte positions at which a cluster starts. A position that is a cluster + // boundary in *both* shapings can be used to splice glyphs without ever + // splitting a ligature. + let base_bounds: HashSet = base.iter().map(|g| g.byte_idx.value()).collect(); + let mut bounds: Vec = fallback + .iter() + .map(|g| g.byte_idx.value()) + .filter(|b| base_bounds.contains(b)) + .collect(); + bounds.push(0); + bounds.push(text.len()); + bounds.sort_unstable(); + bounds.dedup(); + + // Returns the `[start, end)` shared-boundary segment that `byte` falls into. + let segment_of = |byte: usize| -> (usize, usize) { + let start = bounds + .iter() + .rev() + .copied() + .find(|&b| b <= byte) + .unwrap_or(0); + let end = bounds + .iter() + .copied() + .find(|&b| b > byte) + .unwrap_or(text.len()); + (start, end) + }; + + let mut result = Vec::with_capacity(base.len()); + let mut i = 0; + while i < base.len() { + let segment = segment_of(base[i].byte_idx.value()); + + // Collect the whole run of base glyphs belonging to this segment. The + // glyphs of a single cluster are always adjacent (in both LTR and RTL + // visual order), so this run is contiguous. + let run_start = i; + while i < base.len() && segment_of(base[i].byte_idx.value()) == segment { + i += 1; + } + + if base[run_start..i].iter().any(|g| g.is_missing()) { + let fallback_run: Vec = fallback + .iter() + .filter(|g| segment_of(g.byte_idx.value()) == segment) + .cloned() + .collect(); + + // Only replace the cluster if the fallback font resolved all of it. + if !fallback_run.is_empty() && fallback_run.iter().all(|g| !g.is_missing()) { + result.extend(fallback_run); + continue; + } + } + + result.extend_from_slice(&base[run_start..i]); + } + + *base = result; +} + /// Converts a text into a list of glyph IDs. /// /// This function will do the BIDI reordering and text shaping. diff --git a/crates/usvg/tests/parser.rs b/crates/usvg/tests/parser.rs index 905bbc0db..481e2d15e 100644 --- a/crates/usvg/tests/parser.rs +++ b/crates/usvg/tests/parser.rs @@ -601,3 +601,72 @@ fn flattened_text_should_inherit_absolute_transform() { path.abs_bounding_box() ); } + +#[test] +fn compound_emoji_font_fallback() { + // https://github.com/linebender/resvg/issues/861 + // + // A compound (ZWJ-joined) emoji that is not present in the primary font must + // be resolved through font fallback. This is tricky because the fallback + // font shapes the multi-codepoint sequence into a single ligated glyph, + // while the primary font produces one (`.notdef`) glyph per codepoint. The + // old merging logic bailed out whenever the two shapings had a different + // number of glyphs, which dropped the emoji entirely. + // + // U+1F3F3 U+FE0F U+200D U+1F308 is the "rainbow flag" emoji. + let svg = " + + Hi\u{1F3F3}\u{FE0F}\u{200D}\u{1F308}there + + "; + + // Load exactly two fonts so that the fallback target is deterministic: + // Latin text comes from Noto Sans, the emoji can only come from Twitter + // Color Emoji. + let fonts_dir = env!("CARGO_MANIFEST_DIR").to_string() + "/../resvg/tests/fonts"; + let mut opts = usvg::Options::default(); + opts.fontdb_mut() + .load_font_file(format!("{fonts_dir}/NotoSans-Regular.ttf")) + .unwrap(); + opts.fontdb_mut() + .load_font_file(format!("{fonts_dir}/TwitterColorEmoji.subset.ttf")) + .unwrap(); + opts.font_family = "Noto Sans".to_string(); + + let tree = usvg::Tree::from_str(svg, &opts).unwrap(); + + let usvg::Node::Text(text) = &tree.root().children()[0] else { + unreachable!() + }; + + let glyphs: Vec<_> = text + .layouted() + .iter() + .flat_map(|span| span.positioned_glyphs.iter()) + .collect(); + + // No glyph may be `.notdef` (glyph id 0): the emoji must be resolved, not + // dropped. + assert!( + glyphs.iter().all(|g| g.id.0 != 0), + "text contains unresolved (.notdef) glyphs: {:?}", + glyphs + .iter() + .map(|g| (g.id.0, g.text.clone())) + .collect::>() + ); + + // The Latin text is shaped with the primary font; the whole emoji sequence + // must collapse into a single ligated glyph taken from the fallback font. + let primary_font = glyphs[0].font; + let fallback_glyphs: Vec<_> = glyphs.iter().filter(|g| g.font != primary_font).collect(); + assert_eq!( + fallback_glyphs.len(), + 1, + "expected the ZWJ emoji to be a single ligated glyph from the fallback font, got {:?}", + glyphs + .iter() + .map(|g| (g.id.0, g.text.clone())) + .collect::>() + ); +}