Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified crates/resvg/tests/tests/text/direction/rtl.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
109 changes: 87 additions & 22 deletions crates/usvg/src/text/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1336,7 +1336,7 @@ pub(crate) fn shape_text(
None => break 'outer,
};

// Shape again, using a new font.
// Shape the whole text again, using the new font.
let fallback_glyphs = shape_text_with_font(
text,
fallback_font.clone(),
Expand All @@ -1349,27 +1349,10 @@ pub(crate) fn shape_text(
)
.unwrap_or_default();

let all_matched = fallback_glyphs.iter().all(|g| !g.is_missing());
if all_matched {
// Replace all glyphs when all of them were matched.
glyphs = fallback_glyphs;
break 'outer;
}

// We assume, that shaping with an any font will produce the same amount of glyphs.
// This is incorrect, but good enough for now.
if glyphs.len() != fallback_glyphs.len() {
break 'outer;
}

// TODO: Replace clusters and not glyphs. This should be more accurate.

// Copy new glyphs.
for i in 0..glyphs.len() {
if glyphs[i].is_missing() && !fallback_glyphs[i].is_missing() {
glyphs[i] = fallback_glyphs[i].clone();
}
}
// Merge the newly shaped glyphs into the current ones, replacing
// every text cluster that is still missing and that the fallback
// font is able to resolve.
merge_fallback_glyphs(&mut glyphs, &fallback_glyphs, text);

// Remember this font.
used_fonts.push(fallback_font.id);
Expand All @@ -1394,6 +1377,88 @@ pub(crate) fn shape_text(
glyphs
}

/// Merges fallback glyphs into the base glyphs.
///
/// Both `base` and `fallback` are the result of shaping the same `text` with two
/// different fonts. Every text cluster that is still missing (`.notdef`) in
/// `base` is replaced with the corresponding glyphs from `fallback`, but only if
/// the fallback font is able to resolve that whole cluster.
///
/// The two shapings can disagree on cluster boundaries. The most important case
/// are multi-codepoint emoji (flags and other ZWJ sequences): the primary font
/// produces one `.notdef` glyph per codepoint, while the emoji font ligates the
/// whole sequence into a single glyph. To merge them correctly we cut the text
/// only at boundaries shared by *both* shapings and replace whole clusters at a
/// time, instead of trying to align the two glyph lists one by one (which fails
/// as soon as they have a different length).
fn merge_fallback_glyphs(base: &mut Vec<Glyph>, fallback: &[Glyph], text: &str) {
if fallback.is_empty() || base.iter().all(|g| !g.is_missing()) {
return;
}

// Byte positions at which a cluster starts. A position that is a cluster
// boundary in *both* shapings can be used to splice glyphs without ever
// splitting a ligature.
let base_bounds: HashSet<usize> = base.iter().map(|g| g.byte_idx.value()).collect();
let mut bounds: Vec<usize> = fallback
.iter()
.map(|g| g.byte_idx.value())
.filter(|b| base_bounds.contains(b))
.collect();
bounds.push(0);
bounds.push(text.len());
bounds.sort_unstable();
bounds.dedup();

// Returns the `[start, end)` shared-boundary segment that `byte` falls into.
let segment_of = |byte: usize| -> (usize, usize) {
let start = bounds
.iter()
.rev()
.copied()
.find(|&b| b <= byte)
.unwrap_or(0);
let end = bounds
.iter()
.copied()
.find(|&b| b > byte)
.unwrap_or(text.len());
(start, end)
};

let mut result = Vec::with_capacity(base.len());
let mut i = 0;
while i < base.len() {
let segment = segment_of(base[i].byte_idx.value());

// Collect the whole run of base glyphs belonging to this segment. The
// glyphs of a single cluster are always adjacent (in both LTR and RTL
// visual order), so this run is contiguous.
let run_start = i;
while i < base.len() && segment_of(base[i].byte_idx.value()) == segment {
i += 1;
}

if base[run_start..i].iter().any(|g| g.is_missing()) {
let fallback_run: Vec<Glyph> = fallback
.iter()
.filter(|g| segment_of(g.byte_idx.value()) == segment)
.cloned()
.collect();

// Only replace the cluster if the fallback font resolved all of it.
if !fallback_run.is_empty() && fallback_run.iter().all(|g| !g.is_missing()) {
result.extend(fallback_run);
continue;
}
}

result.extend_from_slice(&base[run_start..i]);
}

*base = result;
}

/// Converts a text into a list of glyph IDs.
///
/// This function will do the BIDI reordering and text shaping.
Expand Down
69 changes: 69 additions & 0 deletions crates/usvg/tests/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,3 +601,72 @@ fn flattened_text_should_inherit_absolute_transform() {
path.abs_bounding_box()
);
}

#[test]
fn compound_emoji_font_fallback() {
// https://github.com/linebender/resvg/issues/861
//
// A compound (ZWJ-joined) emoji that is not present in the primary font must
// be resolved through font fallback. This is tricky because the fallback
// font shapes the multi-codepoint sequence into a single ligated glyph,
// while the primary font produces one (`.notdef`) glyph per codepoint. The
// old merging logic bailed out whenever the two shapings had a different
// number of glyphs, which dropped the emoji entirely.
//
// U+1F3F3 U+FE0F U+200D U+1F308 is the "rainbow flag" emoji.
let svg = "
<svg viewBox='0 0 200 200' xmlns='http://www.w3.org/2000/svg' font-size='20'>
<text x='10' y='100'>Hi\u{1F3F3}\u{FE0F}\u{200D}\u{1F308}there</text>
</svg>
";

// Load exactly two fonts so that the fallback target is deterministic:
// Latin text comes from Noto Sans, the emoji can only come from Twitter
// Color Emoji.
let fonts_dir = env!("CARGO_MANIFEST_DIR").to_string() + "/../resvg/tests/fonts";
let mut opts = usvg::Options::default();
opts.fontdb_mut()
.load_font_file(format!("{fonts_dir}/NotoSans-Regular.ttf"))
.unwrap();
opts.fontdb_mut()
.load_font_file(format!("{fonts_dir}/TwitterColorEmoji.subset.ttf"))
.unwrap();
opts.font_family = "Noto Sans".to_string();

let tree = usvg::Tree::from_str(svg, &opts).unwrap();

let usvg::Node::Text(text) = &tree.root().children()[0] else {
unreachable!()
};

let glyphs: Vec<_> = text
.layouted()
.iter()
.flat_map(|span| span.positioned_glyphs.iter())
.collect();

// No glyph may be `.notdef` (glyph id 0): the emoji must be resolved, not
// dropped.
assert!(
glyphs.iter().all(|g| g.id.0 != 0),
"text contains unresolved (.notdef) glyphs: {:?}",
glyphs
.iter()
.map(|g| (g.id.0, g.text.clone()))
.collect::<Vec<_>>()
);

// The Latin text is shaped with the primary font; the whole emoji sequence
// must collapse into a single ligated glyph taken from the fallback font.
let primary_font = glyphs[0].font;
let fallback_glyphs: Vec<_> = glyphs.iter().filter(|g| g.font != primary_font).collect();
assert_eq!(
fallback_glyphs.len(),
1,
"expected the ZWJ emoji to be a single ligated glyph from the fallback font, got {:?}",
glyphs
.iter()
.map(|g| (g.id.0, g.text.clone()))
.collect::<Vec<_>>()
);
}
Loading