diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1d1d5a8154a7..fe6af506c792 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -323,6 +323,8 @@ Optimizations * GITHUB#16283: Use Panama Vector API to SIMD-evaluate fixed-cardinality sorted numeric range queries in rangeIntoBitSet. (Costin Leau) +* GITHUB#16268: Use the doc-values skip index to skip per-doc value lookups in LongRangeFacetCutter. (Jakub Slowinski) + Bug Fixes --------------------- diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java index f3b11f56296f..f36c18bd54c8 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java @@ -36,6 +36,11 @@ interface IntervalTracker extends OrdinalIterator { /** clear recorded information on this tracker. * */ void clear(); + /** + * restart reading from the first recorded ordinal, to replay a {@link #freeze() frozen} tracker + */ + void rewind(); + /** check if any data for the interval has been recorded * */ boolean get(int index); @@ -71,6 +76,12 @@ public void clear() { intervalsWithHit = 0; } + @Override + public void rewind() { + bitFrom = 0; + trackerState = 0; + } + @Override public boolean get(int index) { return tracker.get(index); diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java index b9518bfca154..07e01275c54b 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java @@ -23,6 +23,10 @@ import org.apache.lucene.facet.MultiLongValues; import org.apache.lucene.facet.MultiLongValuesSource; import org.apache.lucene.facet.range.LongRange; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesSkipper; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.sandbox.facet.cutters.FacetCutter; import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter; import org.apache.lucene.search.LongValues; @@ -42,6 +46,10 @@ public abstract class LongRangeFacetCutter implements FacetCutter { // TODO: refactor - weird that we have both multi and single here. final LongValuesSource singleValues; + + // Field faceted by name, whose skip index is used when present, or null when faceting a source. + final String fieldName; + final LongRangeAndPos[] sortedRanges; final int requestedRangeCount; @@ -62,17 +70,34 @@ static LongRangeFacetCutter createSingleOrMultiValued( MultiLongValuesSource longValuesSource, LongValuesSource singleLongValuesSource, LongRange[] longRanges) { + return createSingleOrMultiValued(longValuesSource, singleLongValuesSource, longRanges, null); + } + + /** Same as above, but uses the {@code fieldName} skip index when present. */ + static LongRangeFacetCutter createSingleOrMultiValued( + MultiLongValuesSource longValuesSource, + LongValuesSource singleLongValuesSource, + LongRange[] longRanges, + String fieldName) { if (areOverlappingRanges(longRanges)) { return new OverlappingLongRangeFacetCutter( - longValuesSource, singleLongValuesSource, longRanges); + longValuesSource, singleLongValuesSource, longRanges, fieldName); } return new NonOverlappingLongRangeFacetCutter( - longValuesSource, singleLongValuesSource, longRanges); + longValuesSource, singleLongValuesSource, longRanges, fieldName); } public static LongRangeFacetCutter create( MultiLongValuesSource longValuesSource, LongRange[] longRanges) { - return createSingleOrMultiValued(longValuesSource, null, longRanges); + return createSingleOrMultiValued(longValuesSource, null, longRanges, null); + } + + /** Create {@link FacetCutter} for a long field by name, using its skip index when present. */ + public static LongRangeFacetCutter create(String field, LongRange[] longRanges) { + // Leave the single-valued source null. The skip path reads the field directly, and a + // multi-valued segment must fall back to the multi-valued leaf cutter. + return createSingleOrMultiValued( + MultiLongValuesSource.fromLongField(field), null, longRanges, field); } // caller handles conversion of Doubles and DoubleRange to Long and LongRange @@ -80,7 +105,8 @@ public static LongRangeFacetCutter create( LongRangeFacetCutter( MultiLongValuesSource longValuesSource, LongValuesSource singleLongValuesSource, - LongRange[] longRanges) { + LongRange[] longRanges, + String fieldName) { super(); valuesSource = longValuesSource; if (singleLongValuesSource != null) { @@ -88,6 +114,7 @@ public static LongRangeFacetCutter create( } else { singleValues = MultiLongValuesSource.unwrapSingleton(valuesSource); } + this.fieldName = fieldName; sortedRanges = new LongRangeAndPos[longRanges.length]; requestedRangeCount = longRanges.length; @@ -124,6 +151,32 @@ public static LongRangeFacetCutter create( */ abstract List buildElementaryIntervals(); + /** + * Single-valued {@link NumericDocValues} for {@link #fieldName} in this segment, or null when no + * field is configured or some doc in this segment has more than one value. + */ + final NumericDocValues singletonFieldValues(LeafReaderContext context) throws IOException { + if (fieldName == null) { + return null; + } + return DocValues.unwrapSingleton(DocValues.getSortedNumeric(context.reader(), fieldName)); + } + + /** Wraps {@link NumericDocValues} as {@link LongValues}. */ + static LongValues asLongValues(NumericDocValues values) { + return new LongValues() { + @Override + public long longValue() throws IOException { + return values.longValue(); + } + + @Override + public boolean advanceExact(int doc) throws IOException { + return values.advanceExact(doc); + } + }; + } + private static boolean areOverlappingRanges(LongRange[] ranges) { if (ranges.length == 0) { return false; @@ -149,6 +202,76 @@ private static boolean areOverlappingRanges(LongRange[] ranges) { return false; } + // Returns the elementary interval that v belongs to, binary-searching boundaries from lo onwards. + static int toElementaryInterval(long[] boundaries, long v, int lo) { + int hi = boundaries.length - 1; + int lowerBound = lo; + + while (true) { + int mid = (lo + hi) >>> 1; + if (v <= boundaries[mid]) { + if (mid == lowerBound) { + return mid; + } else { + hi = mid - 1; + } + } else if (v > boundaries[mid + 1]) { + lo = mid + 1; + } else { + return mid + 1; + } + } + } + + /** Cached skipper decision for a leaf cutter, shared by the single- and multi-valued cutters. */ + static final class SkipBlock { + private final DocValuesSkipper skipper; + private final long[] boundaries; + + // Cached decision from advance, valid for every doc up to (and including) upToInclusive. + int upToInclusive = -1; + // Whether every value in the block maps to the single interval upToIntervalOrd. + boolean upToSameInterval; + // Whether every doc in the block has a value. + boolean upToDense; + int upToIntervalOrd; + + SkipBlock(DocValuesSkipper skipper, long[] boundaries) { + this.skipper = skipper; + this.boundaries = boundaries; + } + + void advance(int doc) throws IOException { + if (doc > skipper.maxDocID(0)) { + skipper.advance(doc); + } + upToSameInterval = false; + + if (skipper.minDocID(0) > doc) { + // Corner case which happens if doc doesn't have a value and is between two intervals of the + // skip index. Fall back to per-doc lookups until the next block. + upToInclusive = skipper.minDocID(0) - 1; + return; + } + + upToInclusive = skipper.maxDocID(0); + // Climb to the highest level that still maps to a single interval. + for (int level = 0; level < skipper.numLevels(); ++level) { + // Long fields store raw values, skipper's min/max maps straight into the boundary space. + int minInterval = toElementaryInterval(boundaries, skipper.minValue(level), 0); + int maxInterval = toElementaryInterval(boundaries, skipper.maxValue(level), 0); + if (minInterval != maxInterval) { + break; + } + upToInclusive = skipper.maxDocID(level); + upToSameInterval = true; + upToIntervalOrd = minInterval; + int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1; + upToDense = skipper.docCount(level) == totalDocsAtLevel; + } + } + } + abstract static class LongRangeMultivaluedLeafFacetCutter implements LeafFacetCutter { private final MultiLongValues multiLongValues; private final long[] boundaries; @@ -159,36 +282,53 @@ abstract static class LongRangeMultivaluedLeafFacetCutter implements LeafFacetCu // exclusive ranges. IntervalTracker requestedIntervalTracker; + // Non-null when the field has a skip index. + private final SkipBlock skipBlock; + LongRangeMultivaluedLeafFacetCutter(MultiLongValues longValues, long[] boundaries, int[] pos) { + this(longValues, boundaries, pos, null); + } + + LongRangeMultivaluedLeafFacetCutter( + MultiLongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) { this.multiLongValues = longValues; this.boundaries = boundaries; this.pos = pos; + this.skipBlock = skipper == null ? null : new SkipBlock(skipper, boundaries); elementaryIntervalTracker = new IntervalTracker.MultiIntervalTracker(boundaries.length); } @Override public boolean advanceExact(int doc) throws IOException { - if (multiLongValues.advanceExact(doc) == false) { - return false; + if (skipBlock != null && doc > skipBlock.upToInclusive) { + skipBlock.advance(doc); } elementaryIntervalTracker.clear(); - if (requestedIntervalTracker != null) { requestedIntervalTracker.clear(); } - long numValues = multiLongValues.getValueCount(); - - int lastIntervalSeen = -1; - - for (int i = 0; i < numValues; i++) { - lastIntervalSeen = processValue(multiLongValues.nextValue(), lastIntervalSeen); - assert lastIntervalSeen >= 0 && lastIntervalSeen < boundaries.length; - elementaryIntervalTracker.set(lastIntervalSeen); - if (lastIntervalSeen == boundaries.length - 1) { - // we've already reached the end of all possible intervals for this doc - break; + if (skipBlock != null && skipBlock.upToSameInterval) { + // Reuse the cached ordinal, skipping the binary search. A dense block also skips the value + // lookup, a sparse one still needs advanceExact to know whether this doc has a value. + if (skipBlock.upToDense == false && multiLongValues.advanceExact(doc) == false) { + return false; + } + elementaryIntervalTracker.set(skipBlock.upToIntervalOrd); + } else { + if (multiLongValues.advanceExact(doc) == false) { + return false; + } + long numValues = multiLongValues.getValueCount(); + int lastIntervalSeen = -1; + for (int i = 0; i < numValues; i++) { + lastIntervalSeen = processValue(multiLongValues.nextValue(), lastIntervalSeen); + assert lastIntervalSeen >= 0 && lastIntervalSeen < boundaries.length; + elementaryIntervalTracker.set(lastIntervalSeen); + if (lastIntervalSeen == boundaries.length - 1) { + break; + } } } maybeRollUp(requestedIntervalTracker); @@ -205,7 +345,7 @@ public boolean advanceExact(int doc) throws IOException { // Returns the value of the interval v belongs or lastIntervalSeen // if no processing is done, it returns the lastIntervalSeen private int processValue(long v, int lastIntervalSeen) { - int lo = 0, hi = boundaries.length - 1; + int lo = 0; if (lastIntervalSeen != -1) { // this is the multivalued doc case, we need to set lo correctly @@ -223,22 +363,8 @@ private int processValue(long v, int lastIntervalSeen) { return lastIntervalSeen; } } - int lowerBound = lo; - - while (true) { - int mid = (lo + hi) >>> 1; - if (v <= boundaries[mid]) { - if (mid == lowerBound) { - return mid; - } else { - hi = mid - 1; - } - } else if (v > boundaries[mid + 1]) { - lo = mid + 1; - } else { - return mid + 1; - } - } + + return toElementaryInterval(boundaries, v, lo); } void maybeRollUp(IntervalTracker rollUpInto) {} @@ -252,50 +378,62 @@ abstract static class LongRangeSingleValuedLeafFacetCutter implements LeafFacetC IntervalTracker requestedIntervalTracker; + // Non-null when the field has a skip index. + private final SkipBlock skipBlock; + + // Interval of the previous doc with a value, for replaying the tracker on a repeat. + private int previousIntervalOrd = -1; + LongRangeSingleValuedLeafFacetCutter(LongValues longValues, long[] boundaries, int[] pos) { + this(longValues, boundaries, pos, null); + } + + LongRangeSingleValuedLeafFacetCutter( + LongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) { this.longValues = longValues; this.boundaries = boundaries; this.pos = pos; + this.skipBlock = skipper == null ? null : new SkipBlock(skipper, boundaries); } @Override public boolean advanceExact(int doc) throws IOException { - if (longValues.advanceExact(doc) == false) { - return false; + if (skipBlock != null && doc > skipBlock.upToInclusive) { + skipBlock.advance(doc); } - if (requestedIntervalTracker != null) { - requestedIntervalTracker.clear(); + + int intervalOrd; + if (skipBlock != null && skipBlock.upToSameInterval) { + // Reuse the cached ordinal, skipping the binary search. A dense block also skips the value + // lookup, a sparse one still needs advanceExact to know whether this doc has a value. + if (skipBlock.upToDense == false && longValues.advanceExact(doc) == false) { + return false; + } + intervalOrd = skipBlock.upToIntervalOrd; + } else if (longValues.advanceExact(doc)) { + intervalOrd = processValue(longValues.longValue()); + } else { + return false; } - elementaryIntervalOrd = processValue(longValues.longValue()); - maybeRollUp(requestedIntervalTracker); + + elementaryIntervalOrd = intervalOrd; if (requestedIntervalTracker != null) { - requestedIntervalTracker.freeze(); + if (skipBlock != null && intervalOrd == previousIntervalOrd) { + // Same interval as the previous doc, so replay its frozen rollup instead of rebuilding. + requestedIntervalTracker.rewind(); + } else { + requestedIntervalTracker.clear(); + maybeRollUp(requestedIntervalTracker); + requestedIntervalTracker.freeze(); + previousIntervalOrd = intervalOrd; + } } return true; } - // Returns the value of the interval v belongs or lastIntervalSeen - // if no processing is done, it returns the lastIntervalSeen private int processValue(long v) { - int lo = 0, hi = boundaries.length - 1; - - int lowerBound = lo; - - while (true) { - int mid = (lo + hi) >>> 1; - if (v <= boundaries[mid]) { - if (mid == lowerBound) { - return mid; - } else { - hi = mid - 1; - } - } else if (v > boundaries[mid + 1]) { - lo = mid + 1; - } else { - return mid + 1; - } - } + return toElementaryInterval(boundaries, v, 0); } void maybeRollUp(IntervalTracker rollUpInto) {} diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java index 3d657a96570d..291531014656 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java @@ -22,7 +22,9 @@ import org.apache.lucene.facet.MultiLongValues; import org.apache.lucene.facet.MultiLongValuesSource; import org.apache.lucene.facet.range.LongRange; +import org.apache.lucene.index.DocValuesSkipper; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter; import org.apache.lucene.search.LongValues; import org.apache.lucene.search.LongValuesSource; @@ -32,8 +34,9 @@ class NonOverlappingLongRangeFacetCutter extends LongRangeFacetCutter { NonOverlappingLongRangeFacetCutter( MultiLongValuesSource longValuesSource, LongValuesSource singleLongValuesSource, - LongRange[] longRanges) { - super(longValuesSource, singleLongValuesSource, longRanges); + LongRange[] longRanges, + String fieldName) { + super(longValuesSource, singleLongValuesSource, longRanges, fieldName); } /** @@ -68,6 +71,19 @@ List buildElementaryIntervals() { @Override public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException { + if (fieldName != null) { + DocValuesSkipper skipper = context.reader().getDocValuesSkipper(fieldName); + if (skipper != null) { + NumericDocValues singletonValues = singletonFieldValues(context); + if (singletonValues != null) { + return new NonOverlappingLongRangeSingleValueLeafFacetCutter( + asLongValues(singletonValues), boundaries, pos, skipper); + } + MultiLongValues values = valuesSource.getValues(context); + return new NonOverlappingLongRangeMultiValueLeafFacetCutter( + values, boundaries, pos, skipper); + } + } if (singleValues != null) { LongValues values = singleValues.getValues(context, null); return new NonOverlappingLongRangeSingleValueLeafFacetCutter(values, boundaries, pos); @@ -90,6 +106,11 @@ static class NonOverlappingLongRangeMultiValueLeafFacetCutter super(longValues, boundaries, pos); } + NonOverlappingLongRangeMultiValueLeafFacetCutter( + MultiLongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) { + super(longValues, boundaries, pos, skipper); + } + @Override public int nextOrd() throws IOException { while (true) { @@ -112,6 +133,11 @@ static class NonOverlappingLongRangeSingleValueLeafFacetCutter super(longValues, boundaries, pos); } + NonOverlappingLongRangeSingleValueLeafFacetCutter( + LongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) { + super(longValues, boundaries, pos, skipper); + } + @Override public int nextOrd() throws IOException { if (elementaryIntervalOrd == NO_MORE_ORDS) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java index 58586db892f7..d168129cbdcd 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java @@ -25,7 +25,9 @@ import org.apache.lucene.facet.MultiLongValues; import org.apache.lucene.facet.MultiLongValuesSource; import org.apache.lucene.facet.range.LongRange; +import org.apache.lucene.index.DocValuesSkipper; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.internal.hppc.IntCursor; import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter; import org.apache.lucene.search.LongValues; @@ -43,8 +45,9 @@ class OverlappingLongRangeFacetCutter extends LongRangeFacetCutter { OverlappingLongRangeFacetCutter( MultiLongValuesSource longValuesSource, LongValuesSource singleLongValuesSource, - LongRange[] longRanges) { - super(longValuesSource, singleLongValuesSource, longRanges); + LongRange[] longRanges, + String fieldName) { + super(longValuesSource, singleLongValuesSource, longRanges, fieldName); // Build binary tree on top of intervals: root = split(0, elementaryIntervals.size(), elementaryIntervals); @@ -147,6 +150,19 @@ private static LongRangeNode split(int start, int end, List elem @Override public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException { + if (fieldName != null) { + DocValuesSkipper skipper = context.reader().getDocValuesSkipper(fieldName); + if (skipper != null) { + NumericDocValues singletonValues = singletonFieldValues(context); + if (singletonValues != null) { + return new OverlappingSingleValuedRangeLeafFacetCutter( + asLongValues(singletonValues), boundaries, pos, requestedRangeCount, root, skipper); + } + MultiLongValues values = valuesSource.getValues(context); + return new OverlappingMultivaluedRangeLeafFacetCutter( + values, boundaries, pos, requestedRangeCount, root, skipper); + } + } if (singleValues != null) { LongValues values = singleValues.getValues(context, null); return new OverlappingSingleValuedRangeLeafFacetCutter( @@ -181,6 +197,18 @@ static class OverlappingMultivaluedRangeLeafFacetCutter this.elementaryIntervalRoot = elementaryIntervalRoot; } + OverlappingMultivaluedRangeLeafFacetCutter( + MultiLongValues longValues, + long[] boundaries, + int[] pos, + int requestedRangeCount, + LongRangeNode elementaryIntervalRoot, + DocValuesSkipper skipper) { + super(longValues, boundaries, pos, skipper); + requestedIntervalTracker = new IntervalTracker.MultiIntervalTracker(requestedRangeCount); + this.elementaryIntervalRoot = elementaryIntervalRoot; + } + @Override void maybeRollUp(IntervalTracker rollUpInto) { elementaryIntervalUpto = 0; @@ -233,6 +261,18 @@ static class OverlappingSingleValuedRangeLeafFacetCutter this.elementaryIntervalRoot = elementaryIntervalRoot; } + OverlappingSingleValuedRangeLeafFacetCutter( + LongValues longValues, + long[] boundaries, + int[] pos, + int requestedRangeCount, + LongRangeNode elementaryIntervalRoot, + DocValuesSkipper skipper) { + super(longValues, boundaries, pos, skipper); + requestedIntervalTracker = new IntervalTracker.MultiIntervalTracker(requestedRangeCount); + this.elementaryIntervalRoot = elementaryIntervalRoot; + } + @Override void maybeRollUp(IntervalTracker rollUpInto) { // TODO: for single valued we can rollup after collecting all documents, e.g. in reduce diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java index 8d69acfdc336..05ab7a315c55 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java @@ -35,7 +35,9 @@ private RangeFacetBuilderFactory() {} /** Request long range facets for numeric field by name. */ public static CommonFacetBuilder forLongRanges(String field, LongRange... ranges) { - return forLongRanges(field, MultiLongValuesSource.fromLongField(field), ranges); + return new CommonFacetBuilder( + field, LongRangeFacetCutter.create(field, ranges), new RangeOrdToLabel(ranges)) + .withSortByOrdinal(); } /** diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java index 739f77fe37c5..a9c553861b38 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java @@ -21,6 +21,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; import java.util.List; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.DoublePoint; @@ -57,6 +58,8 @@ import org.apache.lucene.search.LongValuesSource; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollectorManager; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.search.DummyTotalHitCountCollector; @@ -893,6 +896,308 @@ public void testRandomLongsSingleValued() throws Exception { IOUtils.close(r, dir); } + public void testSkipIndexEquivalenceLong() throws Exception { + // mode 1 sorts by the field and mode 2 also shrinks the skip interval, so the blocks are dense + // enough for the fast path to fire. + for (int mode = 0; mode < 3; mode++) { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + if (mode >= 1) { + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG))); + } + if (mode == 2) { + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + } + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long v = TestUtil.nextLong(random(), -100, 100); + doc.add(NumericDocValuesField.indexedField("field", v)); + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "mode=" + mode); + + w.close(); + IOUtils.close(dir); + } + } + + public void testSkipIndexEquivalenceExtremeValues() throws Exception { + // Index sorted with extreme values mixed in, so some skip blocks carry Long.MIN/MAX_VALUE as + // their min/max bounds and advanceSkipper's processValue is exercised on those bounds. + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG))); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long v = + switch (random().nextInt(4)) { + case 0 -> Long.MIN_VALUE; + case 1 -> Long.MAX_VALUE; + default -> TestUtil.nextLong(random(), -100, 100); + }; + doc.add(NumericDocValuesField.indexedField("field", v)); + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "extreme"); + + w.close(); + IOUtils.close(dir); + } + + public void testSkipIndexEquivalenceSparse() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG, false))); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + // Leave roughly a third of the docs without a value so skip blocks aren't dense. + if (random().nextInt(3) != 0) { + doc.add( + NumericDocValuesField.indexedField("field", TestUtil.nextLong(random(), -100, 100))); + } + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "sparse"); + + w.close(); + IOUtils.close(dir); + } + + public void testSkipIndexEquivalenceMultiValued() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numVals = TestUtil.nextInt(random(), 1, 5); + for (int j = 0; j < numVals; j++) { + doc.add(new SortedNumericDocValuesField("field", TestUtil.nextLong(random(), -100, 100))); + } + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "multi-valued"); + + w.close(); + IOUtils.close(dir); + } + + public void testSkipIndexEquivalenceMultiValuedFewValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numVals = TestUtil.nextInt(random(), 1, 3); + for (int j = 0; j < numVals; j++) { + doc.add( + SortedNumericDocValuesField.indexedField("field", TestUtil.nextLong(random(), 0, 5))); + } + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "multi-valued-few-values"); + + w.close(); + IOUtils.close(dir); + } + + public void testSkipIndexEquivalenceMultiValuedExtremeValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numVals = TestUtil.nextInt(random(), 1, 3); + for (int j = 0; j < numVals; j++) { + long v = + switch (random().nextInt(4)) { + case 0 -> Long.MIN_VALUE; + case 1 -> Long.MAX_VALUE; + default -> TestUtil.nextLong(random(), -5, 5); + }; + doc.add(SortedNumericDocValuesField.indexedField("field", v)); + } + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "multi-valued-extreme"); + + w.close(); + IOUtils.close(dir); + } + + public void testSkipIndexEquivalenceMultiValuedSparse() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + // Leave roughly a third of the docs without a value so skip blocks aren't dense. + if (random().nextInt(3) != 0) { + int numVals = TestUtil.nextInt(random(), 1, 3); + for (int j = 0; j < numVals; j++) { + doc.add( + SortedNumericDocValuesField.indexedField("field", TestUtil.nextLong(random(), 0, 5))); + } + } + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "multi-valued-sparse"); + + w.close(); + IOUtils.close(dir); + } + + public void testSkipIndexEquivalenceLongRunsDefaultInterval() throws Exception { + for (boolean multiValued : new boolean[] {false, true}) { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + // A multi-valued SORTED_NUMERIC field can't be index-sorted with a plain LONG SortField. + if (multiValued == false) { + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG))); + } + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(20000); + // Few distinct values with long runs => wide same-interval, dense skip spans across levels. + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long v = (i / 5000) % 4; + if (multiValued) { + doc.add(SortedNumericDocValuesField.indexedField("field", v)); + doc.add(SortedNumericDocValuesField.indexedField("field", v)); + } else { + doc.add(NumericDocValuesField.indexedField("field", v)); + } + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "multi-level-dense mv=" + multiValued); + + w.close(); + IOUtils.close(dir); + } + } + + public void testSkipIndexEquivalenceFewValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG, false))); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("field", TestUtil.nextLong(random(), 0, 5))); + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "few-values"); + + w.close(); + IOUtils.close(dir); + } + + public void testByNameNoSkipIndexEquivalence() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("field", TestUtil.nextLong(random(), -100, 100))); + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "by-name-no-skip-index"); + + w.close(); + IOUtils.close(dir); + } + + private void assertSkipIndexEquivalence(RandomIndexWriter w, String desc) throws IOException { + IndexReader r = w.getReader(); + try { + IndexSearcher s = newSearcher(r, false); + + int numIters = atLeast(10); + for (int iter = 0; iter < numIters; iter++) { + int numRange = TestUtil.nextInt(random(), 0, 20); + LongRange[] ranges = new LongRange[numRange]; + for (int rangeID = 0; rangeID < numRange; rangeID++) { + long min; + long max; + if (random().nextInt(20) == 0) { + // Occasionally use extreme bounds to exercise the boundary edges of processValue. + min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -120, 120); + max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -120, 120); + } else { + min = TestUtil.nextLong(random(), -120, 120); + max = TestUtil.nextLong(random(), -120, 120); + } + if (min > max) { + long x = min; + min = max; + max = x; + } + ranges[rangeID] = new LongRange("r" + rangeID, min, true, max, true); + } + OrdToLabel ordToLabel = new RangeOrdToLabel(ranges); + + // value-source path, no skipper. + CountFacetRecorder baselineRecorder = new CountFacetRecorder(); + s.search( + MatchAllDocsQuery.INSTANCE, + new FacetFieldCollectorManager<>( + LongRangeFacetCutter.create(MultiLongValuesSource.fromLongField("field"), ranges), + baselineRecorder)); + String baseline = + getAllSortByOrd(getRangeOrdinals(ranges), baselineRecorder, "field", ordToLabel) + .toString(); + + // by-field cutter, uses the skip index. + CountFacetRecorder skipRecorder = new CountFacetRecorder(); + s.search( + MatchAllDocsQuery.INSTANCE, + new FacetFieldCollectorManager<>( + LongRangeFacetCutter.create("field", ranges), skipRecorder)); + String withSkip = + getAllSortByOrd(getRangeOrdinals(ranges), skipRecorder, "field", ordToLabel).toString(); + + assertEquals(desc + " iter=" + iter, baseline, withSkip); + } + } finally { + IOUtils.close(r); + } + } + public void testRandomLongsMultiValued() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir);