Skip to content

Commit

Permalink
Reduce frequency of logging messages.
Browse files Browse the repository at this point in the history
Log text extraction cache statistics only once.
  • Loading branch information
nfsantos committed Sep 18, 2024
1 parent 83201ad commit 8e33ba1
Show file tree
Hide file tree
Showing 11 changed files with 35 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.jackrabbit.guava.common.base.Stopwatch;
import org.apache.jackrabbit.guava.common.io.Closer;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.cache.CacheStats;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.concurrent.ExecutorCloser;
import org.apache.jackrabbit.oak.index.IndexHelper;
Expand All @@ -49,6 +50,7 @@
import org.apache.jackrabbit.oak.plugins.index.IndexingReporter;
import org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter;
import org.apache.jackrabbit.oak.plugins.index.progress.MetricRateEstimator;
import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
import org.apache.jackrabbit.oak.plugins.metric.MetricStatisticsProvider;
Expand Down Expand Up @@ -397,6 +399,12 @@ public void reindex() throws CommitFailedException, IOException {
}
}
log.info("Top slowest nodes to index (ms): {}", slowestTopKElements);

indexerProviders.forEach(indexProvider -> {
ExtractedTextCache extractedTextCache = indexProvider.getTextCache();
CacheStats cacheStats = extractedTextCache == null ? null : extractedTextCache.getCacheStats();
log.info("Text extraction cache statistics: {}", cacheStats == null ? "N/A" : cacheStats.cacheInfoAsString());
});
}

progressReporter.reindexingTraversalEnd();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.io.Closeable;

import org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter;
import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.jetbrains.annotations.NotNull;
Expand All @@ -34,4 +35,6 @@ NodeStateIndexer getIndexer(@NotNull String type,
@NotNull String indexPath,
@NotNull NodeBuilder definition,
@NotNull NodeState root, IndexingProgressReporter progressReporter);

ExtractedTextCache getTextCache();
}
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ public void run() {
try (LineIterator ffsLineIterator = new LineIterator(IndexStoreUtils.createReader(ffsPath, algorithm))) {
String oldName = Thread.currentThread().getName();
Thread.currentThread().setName("scanner");
LOG.info("Starting scanning FFS for blobs to download, matching suffix: {}", binaryBlobsPathSuffix);
try {
while (ffsLineIterator.hasNext()) {
String ffsLine = ffsLineIterator.next();
Expand All @@ -247,7 +248,7 @@ public void run() {
processEntry(entryPath, nodeState);
}
linesScanned++;
if (linesScanned % 100_000 == 0) {
if (linesScanned % 1_000_000 == 0) {
LOG.info("[{}] Last path scanned: {}. Aggregated statistics: {}", linesScanned, entryPath, formatAggregateStatistics());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public boolean shouldSkip(String fieldName, String idOrPathValue) {
if (fieldName.equals(NodeDocument.PATH)) {
longPathSkipped.incrementAndGet();
}
if (skippedSoFar % 50_000 == 0) {
if (skippedSoFar % 200_000 == 0) {
LOG.info("skippedSoFar: {}. Long path: {}, Doc: {}={}", skippedSoFar, longPathSkipped.get(), fieldName, idOrPathValue);
}
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public void printStatistics(String heading) {
long threadUserTimeMillis = threadMXBean.getThreadUserTime(thread.getId()) / 1_000_000;
double threadCpuTimePercentage = FormattingUtils.safeComputePercentage(threadCpuTimeMillis, timeSinceStartMillis);
double threadUserTimePercentage = FormattingUtils.safeComputePercentage(threadUserTimeMillis, timeSinceStartMillis);
sb.append(String.format(" Thread %-26s - cpuTime: %6d (%.2f%%), userTime: %6d (%.2f%%)\n",
sb.append(String.format(" Thread %-26s - cpuTime: %7d (%.2f%%), userTime: %7d (%.2f%%)\n",
thread.getName() + "/" + thread.getId(),
threadCpuTimeMillis, threadCpuTimePercentage,
threadUserTimeMillis, threadUserTimePercentage)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
*/
package org.apache.jackrabbit.oak.index;

import org.apache.jackrabbit.guava.common.collect.ImmutableList;
import org.apache.jackrabbit.oak.index.indexer.document.*;
import org.apache.jackrabbit.oak.index.indexer.document.DocumentStoreIndexerBase;
import org.apache.jackrabbit.oak.index.indexer.document.ElasticIndexer;
import org.apache.jackrabbit.oak.index.indexer.document.ElasticIndexerProvider;
import org.apache.jackrabbit.oak.index.indexer.document.NodeStateIndexer;
import org.apache.jackrabbit.oak.index.indexer.document.NodeStateIndexerProvider;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection;

import java.io.IOException;
Expand Down Expand Up @@ -53,7 +56,7 @@ public ElasticDocumentStoreIndexer(IndexHelper indexHelper, IndexerSupport index
}

protected List<NodeStateIndexerProvider> createProviders() {
List<NodeStateIndexerProvider> providers = ImmutableList.of(
List<NodeStateIndexerProvider> providers = List.of(
createElasticIndexerProvider()
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ public ElasticIndexerProvider(IndexHelper indexHelper, ElasticConnection connect
return new ElasticIndexer(idxDefinition, textExtractor, definition, progressReporter, indexWriter, elasticIndexEditorProvider, indexHelper);
}

@Override
public @Nullable ExtractedTextCache getTextCache() {
return textCache;
}

@Override
public void close() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
*/
package org.apache.jackrabbit.oak.index.indexer.document;

import org.apache.jackrabbit.guava.common.collect.ImmutableList;
import org.apache.jackrabbit.oak.index.ExtendedIndexHelper;
import org.apache.jackrabbit.oak.index.IndexerSupport;

Expand All @@ -41,7 +40,7 @@ private NodeStateIndexerProvider createLuceneIndexProvider() throws IOException
}

protected List<NodeStateIndexerProvider> createProviders() throws IOException {
List<NodeStateIndexerProvider> providers = ImmutableList.of(
List<NodeStateIndexerProvider> providers = List.of(
createLuceneIndexProvider()
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE;
Expand All @@ -45,12 +46,11 @@ public class LuceneIndexerProvider implements NodeStateIndexerProvider {
private final ExtractedTextCache textCache =
new ExtractedTextCache(FileUtils.ONE_MB * 5, TimeUnit.HOURS.toSeconds(5));
private final ExtendedIndexHelper extendedIndexHelper;
private final DirectoryFactory dirFactory;
private final LuceneIndexWriterFactory indexWriterFactory;

public LuceneIndexerProvider(ExtendedIndexHelper extendedIndexHelper, IndexerSupport indexerSupport) throws IOException {
this.extendedIndexHelper = extendedIndexHelper;
this.dirFactory = new FSDirectoryFactory(indexerSupport.getLocalIndexDir());
DirectoryFactory dirFactory = new FSDirectoryFactory(indexerSupport.getLocalIndexDir());
this.indexWriterFactory = new DefaultIndexWriterFactory(extendedIndexHelper.getMountInfoProvider(),
dirFactory, extendedIndexHelper.getLuceneIndexHelper().getWriterConfigForReindex());
}
Expand All @@ -76,6 +76,11 @@ public NodeStateIndexer getIndexer(@NotNull String type, @NotNull String indexPa
);
}

@Override
public @Nullable ExtractedTextCache getTextCache() {
return textCache;
}

@Override
public void close() throws IOException {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.cache.CacheStats;
import org.apache.jackrabbit.oak.commons.io.LazyInputStream;
import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
Expand Down Expand Up @@ -98,9 +97,6 @@ public void done(boolean reindex){

public void logStats() {
log.info("[{}] Text extraction statistics: {}", getIndexName(), textExtractionStats.formatStats());
CacheStats cacheStats = extractedTextCache.getCacheStats();
log.info("[{}] Text extraction cache statistics: {}",
getIndexName(), cacheStats == null ? "N/A" : cacheStats.cacheInfoAsString());
}

public List<String> newBinary(PropertyState property, NodeState state, String path) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ final class TextExtractionStats {
private long startTimeNanos = System.nanoTime();

public void reset() {
log.info("Resetting statistics");
this.numberOfExtractions = 0;
this.totalBytesRead = 0;
this.totalExtractedTextLength = 0;
Expand Down Expand Up @@ -100,12 +99,11 @@ public String formatStats() {
double timeExtractingPercentage = FormattingUtils.safeComputePercentage(totalExtractionTimeNanos, timeSinceStartNanos);
long avgExtractionTimeMillis = Math.round(FormattingUtils.safeComputeAverage(totalExtractionTimeNanos / 1_000_000, numberOfExtractions));

return String.format("extractions: %d, timeExtracting: %s (%2.1f%%), totalTime: %s, " +
return String.format("extractions: %d, timeExtracting: %s (%2.1f%%), " +
"avgExtractionTime: %s ms, bytesRead: %s, extractedTextSize: %s",
numberOfExtractions,
FormattingUtils.formatNanosToSeconds(totalExtractionTimeNanos),
timeExtractingPercentage,
FormattingUtils.formatNanosToSeconds(timeSinceStartNanos),
avgExtractionTimeMillis,
humanReadableByteCount(totalBytesRead), humanReadableByteCount(totalExtractedTextLength));
}
Expand Down

0 comments on commit 8e33ba1

Please sign in to comment.