[CELEBORN-83][FOLLOWUP] Fix various bugs when using HDFS as storage. (#1065)

2022-12-15 15:20:29 +08:00 · 2022-12-15 15:20:29 +08:00 · 65cb36c002
commit 65cb36c002
parent df5ed8ec09
14 changed files with 177 additions and 78 deletions
--- a/README.md
+++ b/README.md
@ -121,6 +121,7 @@ celeborn.ha.master.node.3.ratis.port 9874
 celeborn.ha.master.ratis.raft.server.storage.dir /mnt/disk1/rss_ratis/
 celeborn.metrics.enabled true
 # If you want to use HDFS as shuffle storage, make sure that flush buffer size is at least 4MB or larger.
 celeborn.worker.flush.buffer.size 256k
 celeborn.worker.storage.dirs /mnt/disk1/,/mnt/disk2
 # If your hosts have disk raid or use lvm, set celeborn.worker.monitor.disk.enabled to false
--- a/client/src/main/java/org/apache/celeborn/client/ShuffleClient.java
+++ b/client/src/main/java/org/apache/celeborn/client/ShuffleClient.java
@ -24,6 +24,8 @@ import java.util.function.BooleanSupplier;
 import io.netty.buffer.ByteBuf;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.celeborn.client.read.RssInputStream;
 import org.apache.celeborn.common.CelebornConf;
@ -39,6 +41,7 @@ public abstract class ShuffleClient {
  private static volatile ShuffleClient _instance;
  private static volatile boolean initialized = false;
  private static volatile FileSystem hdfsFs;
  private static Logger logger = LoggerFactory.getLogger(ShuffleClient.class);
  // for testing
  public static void reset() {
@ -102,6 +105,11 @@ public abstract class ShuffleClient {
      synchronized (ShuffleClient.class) {
        if (null == hdfsFs) {
          Configuration hdfsConfiguration = new Configuration();
          // enable fs cache to avoid too many fs instances
          hdfsConfiguration.set("fs.hdfs.impl.disable.cache", "false");
          logger.info(
              "Celeborn client will ignore cluster"
                  + " settings about fs.hdfs.impl.disable.cache and set it to false");
          try {
            hdfsFs = FileSystem.get(hdfsConfiguration);
          } catch (IOException e) {
--- a/client/src/main/java/org/apache/celeborn/client/read/DfsPartitionReader.java
+++ b/client/src/main/java/org/apache/celeborn/client/read/DfsPartitionReader.java
@ -23,7 +23,6 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import io.netty.buffer.ByteBuf;
@ -53,9 +52,10 @@ public class DfsPartitionReader implements PartitionReader {
  private final AtomicReference<IOException> exception = new AtomicReference<>();
  private volatile boolean closed = false;
  private Thread fetchThread;
-  private final FSDataInputStream hdfsInputStream;
+  private FSDataInputStream hdfsInputStream;
  private int numChunks = 0;
-  private final AtomicInteger currentChunkIndex = new AtomicInteger(0);
+  private int returnedChunks = 0;
  private int currentChunkIndex = 0;
  public DfsPartitionReader(
      CelebornConf conf,
@ -98,29 +98,67 @@ public class DfsPartitionReader implements PartitionReader {
          ShuffleClient.getHdfsFs(conf).open(new Path(location.getStorageInfo().getFilePath()));
      chunkOffsets.addAll(getChunkOffsetsFromUnsortedIndex(conf, location));
    }
    logger.debug(
        "DFS {} index count:{} offsets:{}",
        location.getStorageInfo().getFilePath(),
        chunkOffsets.size(),
        chunkOffsets);
    if (chunkOffsets.size() > 1) {
      numChunks = chunkOffsets.size() - 1;
      fetchThread =
          new Thread(
              () -> {
                try {
-                  while (!closed && currentChunkIndex.get() < numChunks) {
+                  while (!closed && currentChunkIndex < numChunks) {
                    while (results.size() >= fetchMaxReqsInFlight) {
                      Thread.sleep(50);
                    }
-                    long offset = chunkOffsets.get(currentChunkIndex.get());
+                    long offset = chunkOffsets.get(currentChunkIndex);
-                    long length = chunkOffsets.get(currentChunkIndex.get() + 1) - offset;
+                    long length = chunkOffsets.get(currentChunkIndex + 1) - offset;
                    logger.debug("read {} offset {} length {}", currentChunkIndex, offset, length);
                    byte[] buffer = new byte[(int) length];
-                    hdfsInputStream.readFully(offset, buffer);
+                    try {
-                    results.add(Unpooled.wrappedBuffer(buffer));
+                      hdfsInputStream.readFully(offset, buffer);
-                    currentChunkIndex.incrementAndGet();
+                    } catch (IOException e) {
                      logger.warn(
                          "read hdfs {} failed will retry, error detail {}",
                          location.getStorageInfo().getFilePath(),
                          e);
                      try {
                        hdfsInputStream.close();
                        hdfsInputStream =
                            ShuffleClient.getHdfsFs(conf)
                                .open(
                                    new Path(
                                        Utils.getSortedFilePath(
                                            location.getStorageInfo().getFilePath())));
                        hdfsInputStream.readFully(offset, buffer);
                      } catch (IOException ex) {
                        logger.warn(
                            "retry read hdfs {} failed, error detail {} ",
                            location.getStorageInfo().getFilePath(),
                            e);
                        exception.set(ex);
                        break;
                      }
                    }
                    results.put(Unpooled.wrappedBuffer(buffer));
                    logger.debug("add index {} to results", currentChunkIndex++);
                  }
-                } catch (IOException e) {
+                } catch (Exception e) {
-                  exception.set(e);
+                  logger.warn("Fetch thread is cancelled.", e);
                } catch (InterruptedException e) {
                  // cancel a task for speculative, ignore this exception
                }
-              });
+                logger.debug("fetch {} is done.", location.getStorageInfo().getFilePath());
              },
              "Dfs-fetch-thread" + location.getStorageInfo().getFilePath());
      fetchThread.setUncaughtExceptionHandler(
          new Thread.UncaughtExceptionHandler() {
            @Override
            public void uncaughtException(Thread t, Throwable e) {
              logger.error("thread {} failed with exception {}", t, e);
            }
          });
      fetchThread.start();
      logger.debug("Start dfs read on location {}", location);
    }
@ -145,6 +183,7 @@ public class DfsPartitionReader implements PartitionReader {
      throws IOException {
    String indexPath = Utils.getIndexFilePath(location.getStorageInfo().getFilePath());
    FSDataInputStream indexInputStream = ShuffleClient.getHdfsFs(conf).open(new Path(indexPath));
    logger.debug("read sorted index {}", indexPath);
    long indexSize = ShuffleClient.getHdfsFs(conf).getFileStatus(new Path(indexPath)).getLen();
    // Index size won't be large, so it's safe to do the conversion.
    byte[] indexBuffer = new byte[(int) indexSize];
@ -162,17 +201,18 @@ public class DfsPartitionReader implements PartitionReader {
  @Override
  public boolean hasNext() {
-    return currentChunkIndex.get() < numChunks;
+    logger.debug("check has next current index: {} chunks {}", returnedChunks, numChunks);
    return returnedChunks < numChunks;
  }
  @Override
  public ByteBuf next() throws IOException {
    checkException();
    ByteBuf chunk = null;
    try {
      while (chunk == null) {
        checkException();
        chunk = results.poll(500, TimeUnit.MILLISECONDS);
        logger.debug("poll result with result size: {}", results.size());
      }
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
@ -180,6 +220,7 @@ public class DfsPartitionReader implements PartitionReader {
      exception.set(ioe);
      throw ioe;
    }
    returnedChunks++;
    return chunk;
  }
@ -193,7 +234,9 @@ public class DfsPartitionReader implements PartitionReader {
  @Override
  public void close() {
    closed = true;
-    fetchThread.interrupt();
+    if (fetchThread != null) {
      fetchThread.interrupt();
    }
    try {
      hdfsInputStream.close();
    } catch (IOException e) {
--- a/codecov.yml
+++ b/codecov.yml
@ -21,6 +21,8 @@ coverage:
  status:
    project:
      default:
-        target: 0%
+        target: 10%
-        threshold: 0%
+    patch:
-        base: auto
+      default:
        enabled: no
        if_not_found: success
--- a/common/src/main/java/org/apache/celeborn/common/meta/FileInfo.java
+++ b/common/src/main/java/org/apache/celeborn/common/meta/FileInfo.java
@ -18,7 +18,6 @@
 package org.apache.celeborn.common.meta;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@ -27,12 +26,15 @@ import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.celeborn.common.identity.UserIdentifier;
 import org.apache.celeborn.common.protocol.PartitionType;
 import org.apache.celeborn.common.util.Utils;
 public class FileInfo {
  private static Logger logger = LoggerFactory.getLogger(FileInfo.class);
  private final String filePath;
  private final List<Long> chunkOffsets;
  private final UserIdentifier userIdentifier;
@ -127,12 +129,23 @@ public class FileInfo {
    return userIdentifier;
  }
-  public void deleteAllFiles(FileSystem hdfsFs) throws IOException {
+  public void deleteAllFiles(FileSystem hdfsFs) {
    if (isHdfs()) {
-      hdfsFs.delete(getHdfsPath(), false);
+      try {
-      hdfsFs.delete(getHdfsWriterSuccessPath(), false);
+        hdfsFs.delete(getHdfsPath(), false);
-      hdfsFs.delete(getHdfsIndexPath(), false);
+        hdfsFs.delete(getHdfsWriterSuccessPath(), false);
-      hdfsFs.delete(getHdfsSortedPath(), false);
+        hdfsFs.delete(getHdfsIndexPath(), false);
        hdfsFs.delete(getHdfsSortedPath(), false);
      } catch (Exception e) {
        // ignore delete exceptions because some other workers might be deleting the directory
        logger.debug(
            "delete hdfs file {},{},{},{} failed {}",
            getHdfsPath(),
            getHdfsWriterSuccessPath(),
            getHdfsIndexPath(),
            getHdfsSortedPath(),
            e);
      }
    } else {
      getFile().delete();
      new File(getIndexPath()).delete();
--- a/conf/log4j2.xml.template
+++ b/conf/log4j2.xml.template
@ -33,9 +33,13 @@
            <PatternLayout pattern="%d{yy/MM/dd HH:mm:ss,SSS} %p [%t] %c{1}: %m%n%ex"/>
        </Console>
    </Appenders>
    <Loggers>
        <Root level="INFO">
            <AppenderRef ref="stdout"/>
        </Root>
        <Logger name="org.apache.hadoop.hdfs" level="WARN" additivity="false">
            <Appender-ref ref="stdout" level="WARN" />
        </Logger>
    </Loggers>
 </Configuration>
--- a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/FileWriter.java
+++ b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/FileWriter.java
@ -27,7 +27,6 @@ import java.util.concurrent.atomic.AtomicInteger;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.CompositeByteBuf;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.roaringbitmap.RoaringBitmap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -53,10 +52,9 @@ public abstract class FileWriter implements DeviceObserver {
  private static final long WAIT_INTERVAL_MS = 20;
  protected final FileInfo fileInfo;
-  protected FileChannel channel;
+  private FileChannel channel;
-  protected FSDataOutputStream stream;
+  private volatile boolean closed;
-  protected volatile boolean closed;
+  private volatile boolean destroyed;
  protected volatile boolean destroyed;
  protected final AtomicInteger numPendingWrites = new AtomicInteger();
  protected long bytesFlushed;
@ -105,7 +103,20 @@ public abstract class FileWriter implements DeviceObserver {
    if (!fileInfo.isHdfs()) {
      channel = new FileOutputStream(fileInfo.getFilePath()).getChannel();
    } else {
-      stream = StorageManager.hdfsFs().create(fileInfo.getHdfsPath(), true);
+      // We open the stream and close immediately because HDFS output stream will
      // create a DataStreamer that is a threaed.
      // If we reuse HDFS output stream, we will exhaust the memory soon.
      try {
        StorageManager.hadoopFs().create(fileInfo.getHdfsPath(), true).close();
      } catch (IOException e) {
        try {
          // If create file failed, wait 10 ms and retry
          Thread.sleep(10);
        } catch (InterruptedException ex) {
          throw new RuntimeException(ex);
        }
        StorageManager.hadoopFs().create(fileInfo.getHdfsPath(), true).close();
      }
    }
    source = workerSource;
    logger.debug("FileWriter {} split threshold {} mode {}", this, splitThreshold, splitMode);
@ -138,8 +149,8 @@ public abstract class FileWriter implements DeviceObserver {
    FlushTask task = null;
    if (channel != null) {
      task = new LocalFlushTask(flushBuffer, channel, notifier);
-    } else if (stream != null) {
+    } else if (fileInfo.isHdfs()) {
-      task = new HdfsFlushTask(flushBuffer, stream, notifier);
+      task = new HdfsFlushTask(flushBuffer, fileInfo.getHdfsPath(), notifier);
    }
    addTask(task);
    flushBuffer = null;
@ -248,8 +259,7 @@ public abstract class FileWriter implements DeviceObserver {
        if (channel != null) {
          channel.close();
        }
-        if (stream != null) {
+        if (fileInfo.isHdfs()) {
          stream.close();
          streamClose.run();
        }
      } catch (IOException e) {
@ -278,9 +288,6 @@ public abstract class FileWriter implements DeviceObserver {
        if (channel != null) {
          channel.close();
        }
        if (stream != null) {
          stream.close();
        }
      } catch (IOException e) {
        logger.warn(
            "Close channel failed for file {} caused by {}.",
@ -291,14 +298,12 @@ public abstract class FileWriter implements DeviceObserver {
    if (!destroyed) {
      destroyed = true;
-      try {
+      fileInfo.deleteAllFiles(StorageManager.hadoopFs());
        fileInfo.deleteAllFiles(StorageManager.hdfsFs());
      } catch (Exception e) {
        logger.warn("Exception when cleaning hdfs file {}", fileInfo.getFilePath());
      }
      // unregister from DeviceMonitor
-      deviceMonitor.unregisterFileWriter(this);
+      if (!fileInfo.isHdfs()) {
        deviceMonitor.unregisterFileWriter(this);
      }
      destroyHook.run();
    }
  }
--- a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/MapPartitionFileWriter.java
+++ b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/MapPartitionFileWriter.java
@ -75,7 +75,7 @@ public final class MapPartitionFileWriter extends FileWriter {
    if (!fileInfo.isHdfs()) {
      channelIndex = new FileOutputStream(fileInfo.getIndexPath()).getChannel();
    } else {
-      streamIndex = StorageManager.hdfsFs().create(fileInfo.getHdfsIndexPath(), true);
+      streamIndex = StorageManager.hadoopFs().create(fileInfo.getHdfsIndexPath(), true);
    }
  }
--- a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
+++ b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
@ -342,7 +342,7 @@ public class PartitionFilesSorter extends ShuffleRecoverHelper {
    if (isHdfs) {
      // If the index file exists, it will be overwritten.
      // So there is no need to check its existence.
-      hdfsIndexOutput = StorageManager.hdfsFs().create(new Path(indexFilePath));
+      hdfsIndexOutput = StorageManager.hadoopFs().create(new Path(indexFilePath));
    } else {
      indexFileChannel = new FileOutputStream(indexFilePath).getChannel();
    }
@ -455,8 +455,9 @@ public class PartitionFilesSorter extends ShuffleRecoverHelper {
      int indexSize = 0;
      try {
        if (isHdfs) {
-          hdfsIndexStream = StorageManager.hdfsFs().open(new Path(indexFilePath));
+          hdfsIndexStream = StorageManager.hadoopFs().open(new Path(indexFilePath));
-          indexSize = (int) StorageManager.hdfsFs().getFileStatus(new Path(indexFilePath)).getLen();
+          indexSize =
              (int) StorageManager.hadoopFs().getFileStatus(new Path(indexFilePath)).getLen();
        } else {
          indexStream = new FileInputStream(indexFilePath);
          File indexFile = new File(indexFilePath);
@ -520,11 +521,11 @@ public class PartitionFilesSorter extends ShuffleRecoverHelper {
          indexFile.delete();
        }
      } else {
-        if (StorageManager.hdfsFs().exists(fileInfo.getHdfsSortedPath())) {
+        if (StorageManager.hadoopFs().exists(fileInfo.getHdfsSortedPath())) {
-          StorageManager.hdfsFs().delete(fileInfo.getHdfsSortedPath(), false);
+          StorageManager.hadoopFs().delete(fileInfo.getHdfsSortedPath(), false);
        }
-        if (StorageManager.hdfsFs().exists(fileInfo.getHdfsIndexPath())) {
+        if (StorageManager.hadoopFs().exists(fileInfo.getHdfsIndexPath())) {
-          StorageManager.hdfsFs().delete(fileInfo.getHdfsIndexPath(), false);
+          StorageManager.hadoopFs().delete(fileInfo.getHdfsIndexPath(), false);
        }
      }
    }
@ -612,8 +613,8 @@ public class PartitionFilesSorter extends ShuffleRecoverHelper {
    private void initializeFiles() throws IOException {
      if (isHdfs) {
-        hdfsOriginInput = StorageManager.hdfsFs().open(new Path(originFilePath));
+        hdfsOriginInput = StorageManager.hadoopFs().open(new Path(originFilePath));
-        hdfsSortedOutput = StorageManager.hdfsFs().create(new Path(sortedFilePath));
+        hdfsSortedOutput = StorageManager.hadoopFs().create(new Path(sortedFilePath));
      } else {
        originFileChannel = new FileInputStream(originFilePath).getChannel();
        sortedFileChannel = new FileOutputStream(sortedFilePath).getChannel();
@ -646,7 +647,7 @@ public class PartitionFilesSorter extends ShuffleRecoverHelper {
    private void deleteOriginFiles() throws IOException {
      boolean deleteSuccess = false;
      if (isHdfs) {
-        deleteSuccess = StorageManager.hdfsFs().delete(new Path(originFilePath), false);
+        deleteSuccess = StorageManager.hadoopFs().delete(new Path(originFilePath), false);
      } else {
        deleteSuccess = new File(originFilePath).delete();
      }
--- a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/ReducePartitionFileWriter.java
+++ b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/ReducePartitionFileWriter.java
@ -94,13 +94,13 @@ public final class ReducePartitionFileWriter extends FileWriter {
          }
        },
        () -> {
-          if (StorageManager.hdfsFs().exists(fileInfo.getHdfsPeerWriterSuccessPath())) {
+          if (StorageManager.hadoopFs().exists(fileInfo.getHdfsPeerWriterSuccessPath())) {
-            StorageManager.hdfsFs().delete(fileInfo.getHdfsPath(), false);
+            StorageManager.hadoopFs().delete(fileInfo.getHdfsPath(), false);
            deleted = true;
          } else {
-            StorageManager.hdfsFs().create(fileInfo.getHdfsWriterSuccessPath()).close();
+            StorageManager.hadoopFs().create(fileInfo.getHdfsWriterSuccessPath()).close();
            FSDataOutputStream indexOutputStream =
-                StorageManager.hdfsFs().create(fileInfo.getHdfsIndexPath());
+                StorageManager.hadoopFs().create(fileInfo.getHdfsIndexPath());
            indexOutputStream.writeInt(fileInfo.getChunkOffsets().size());
            for (Long offset : fileInfo.getChunkOffsets()) {
              indexOutputStream.writeLong(offset);
--- a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/PushDataHandler.scala
+++ b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/PushDataHandler.scala
@ -37,7 +37,7 @@ import org.apache.celeborn.common.protocol.{PartitionLocation, PartitionSplitMod
 import org.apache.celeborn.common.protocol.message.StatusCode
 import org.apache.celeborn.common.unsafe.Platform
 import org.apache.celeborn.common.util.PackedPartitionId
-import org.apache.celeborn.service.deploy.worker.storage.{FileWriter, LocalFlusher, MapPartitionFileWriter, StorageManager}
+import org.apache.celeborn.service.deploy.worker.storage.{FileWriter, HdfsFlusher, LocalFlusher, MapPartitionFileWriter, StorageManager}
 class PushDataHandler extends BaseMessageHandler with Logging {
@ -224,9 +224,14 @@ class PushDataHandler extends BaseMessageHandler with Logging {
      callback.onFailure(new Exception(message, exception))
      return
    }
-    val diskFull = workerInfo.diskInfos
+    val diskFull =
-      .get(fileWriter.flusher.asInstanceOf[LocalFlusher].mountPoint)
+      if (fileWriter.flusher.isInstanceOf[LocalFlusher]) {
-      .actualUsableSpace < diskReserveSize
+        workerInfo.diskInfos
          .get(fileWriter.flusher.asInstanceOf[LocalFlusher].mountPoint)
          .actualUsableSpace < diskReserveSize
      } else {
        false
      }
    if ((diskFull && fileWriter.getFileInfo.getFileLength > partitionSplitMinimumSize) ||
      (isMaster && fileWriter.getFileInfo.getFileLength > fileWriter.getSplitThreshold())) {
      if (fileWriter.getSplitMode == PartitionSplitMode.SOFT) {
@ -859,6 +864,9 @@ class PushDataHandler extends BaseMessageHandler with Logging {
  }
  private def checkDiskFull(fileWriter: FileWriter): Boolean = {
    if (fileWriter.flusher.isInstanceOf[HdfsFlusher]) {
      return false
    }
    val diskFull = workerInfo.diskInfos
      .get(fileWriter.flusher.asInstanceOf[LocalFlusher].mountPoint)
      .actualUsableSpace < diskReserveSize
--- a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/FlushTask.scala
+++ b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/FlushTask.scala
@ -20,7 +20,7 @@ package org.apache.celeborn.service.deploy.worker.storage
 import java.nio.channels.FileChannel
 import io.netty.buffer.{ByteBufUtil, CompositeByteBuf}
-import org.apache.hadoop.fs.FSDataOutputStream
+import org.apache.hadoop.fs.{FSDataOutputStream, Path}
 abstract private[worker] class FlushTask(
    val buffer: CompositeByteBuf,
@ -44,9 +44,11 @@ private[worker] class LocalFlushTask(
 private[worker] class HdfsFlushTask(
    buffer: CompositeByteBuf,
-    fsStream: FSDataOutputStream,
+    val path: Path,
    notifier: FlushNotifier) extends FlushTask(buffer, notifier) {
  override def flush(): Unit = {
-    fsStream.write(ByteBufUtil.getBytes(buffer))
+    val hdfsStream = StorageManager.hadoopFs.append(path)
    hdfsStream.write(ByteBufUtil.getBytes(buffer))
    hdfsStream.close()
  }
 }
--- a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/Flusher.scala
+++ b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/Flusher.scala
@ -223,11 +223,11 @@ final private[worker] class HdfsFlusher(
    hdfsFlusherThreads,
    flushAvgTimeWindowSize,
    avgFlushTimeSlidingWindowMinCount) with Logging {
  override def toString: String = s"HdfsFlusher@$flusherId"
  override def processIOException(e: IOException, deviceErrorType: DiskStatus): Unit = {
    stopAndCleanFlusher()
    logError(s"$this write failed, reason $deviceErrorType ,exception: $e")
  }
 }
--- a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/StorageManager.scala
+++ b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/storage/StorageManager.scala
@ -45,7 +45,7 @@ import org.apache.celeborn.common.protocol.{PartitionLocation, PartitionSplitMod
 import org.apache.celeborn.common.quota.ResourceConsumption
 import org.apache.celeborn.common.util.{PbSerDeUtils, ThreadUtils, Utils}
 import org.apache.celeborn.service.deploy.worker._
-import org.apache.celeborn.service.deploy.worker.storage.StorageManager.hdfsFs
+import org.apache.celeborn.service.deploy.worker.storage.StorageManager.hadoopFs
 final private[worker] class StorageManager(conf: CelebornConf, workerSource: AbstractSource)
  extends ShuffleRecoverHelper with DeviceObserver with Logging with MemoryPressureListener {
@ -129,7 +129,10 @@ final private[worker] class StorageManager(conf: CelebornConf, workerSource: Abs
      val hdfsConfiguration = new Configuration
      hdfsConfiguration.set("fs.defaultFS", hdfsDir)
      hdfsConfiguration.set("dfs.replication", "2")
-      StorageManager.hdfsFs = FileSystem.get(hdfsConfiguration)
+      hdfsConfiguration.set("fs.hdfs.impl.disable.cache", "false")
      logInfo("Celeborn will ignore cluster settings" +
        " about fs.hdfs.impl.disable.cache and set it to false")
      StorageManager.hadoopFs = FileSystem.get(hdfsConfiguration)
      Some(new HdfsFlusher(
        workerSource,
        conf.hdfsFlusherThreads,
@ -270,7 +273,7 @@ final private[worker] class StorageManager(conf: CelebornConf, workerSource: Abs
        if (diskInfo != null && diskInfo.status.equals(DiskStatus.HEALTHY)) {
          diskInfo.dirs
        } else {
-          logWarning(s"Disk unavailable for $suggestedMountPoint, return all healthy" +
+          logDebug(s"Disk unavailable for $suggestedMountPoint, return all healthy" +
            s" working dirs. diskInfo $diskInfo")
          healthyWorkingDirs()
        }
@ -281,7 +284,7 @@ final private[worker] class StorageManager(conf: CelebornConf, workerSource: Abs
      if (dirs.isEmpty) {
        val shuffleDir =
          new Path(new Path(hdfsDir, conf.workerWorkingDir), s"$appId/$shuffleId")
-        FileSystem.mkdirs(StorageManager.hdfsFs, shuffleDir, hdfsPermission)
+        FileSystem.mkdirs(StorageManager.hadoopFs, shuffleDir, hdfsPermission)
        val fileInfo =
          new FileInfo(new Path(shuffleDir, fileName).toString, userIdentifier, partitionType)
        val hdfsWriter = partitionType match {
@ -420,7 +423,16 @@ final private[worker] class StorageManager(conf: CelebornConf, workerSource: Abs
          deleteDirectory(file, diskOperators.get(diskInfo.mountPoint))
        }
      }
-      hdfsInfos.foreach(item => item._2.deleteAllFiles(StorageManager.hdfsFs))
+      hdfsInfos.foreach(item => item._2.deleteAllFiles(StorageManager.hadoopFs))
      if (!hdfsInfos.isEmpty) {
        try {
          StorageManager.hadoopFs.delete(
            new Path(new Path(hdfsDir, conf.workerWorkingDir), s"$appId/$shuffleId"),
            true)
        } catch {
          case e: Exception => logWarning("Clean expired hdfs shuffle failed.", e)
        }
      }
    }
  }
@ -465,14 +477,14 @@ final private[worker] class StorageManager(conf: CelebornConf, workerSource: Abs
      }
    }
-    if (hdfsFs != null) {
+    if (hadoopFs != null) {
      val hdfsWorkPath = new Path(hdfsDir, conf.workerWorkingDir)
-      if (hdfsFs.exists(hdfsWorkPath)) {
+      if (hadoopFs.exists(hdfsWorkPath)) {
-        val iter = hdfsFs.listFiles(hdfsWorkPath, false)
+        val iter = hadoopFs.listFiles(hdfsWorkPath, false)
        while (iter.hasNext) {
          val fileStatus = iter.next()
          if (fileStatus.getModificationTime < expireTime) {
-            hdfsFs.delete(fileStatus.getPath, true)
+            hadoopFs.delete(fileStatus.getPath, true)
          }
        }
      }
@ -528,7 +540,7 @@ final private[worker] class StorageManager(conf: CelebornConf, workerSource: Abs
          }
        }
-      val hdfsCleaned = hdfsFs match {
+      val hdfsCleaned = hadoopFs match {
        case hdfs: FileSystem =>
          val hdfsWorkPath = new Path(hdfsDir, conf.workerWorkingDir)
          // hdfs path not exist when first time initialize
@ -670,5 +682,5 @@ final private[worker] class StorageManager(conf: CelebornConf, workerSource: Abs
 }
 object StorageManager {
-  var hdfsFs: FileSystem = _
+  var hadoopFs: FileSystem = _
 }