kyuubi/kyuubi-common/pom.xml
Bowen Liang db57e9365d [KYUUBI #6587] Periodically expire temp files and operation logs on server to avoid memeory leak by Files.deleteOnExit
# 🔍 Description
## Issue References 🔗

-

## Describe Your Solution 🔧
Fix the memory leak on server caused by `Files.deleteOnExit`.
For long-running Kyuubi server instances, some operation log files and batch job upload files are marked for deletion at exit using `Files.deleteOnExit`. However, the `files` list within the `DeleteOnExitHook`  by `Files.deleteOnExit` method continuously accumulates file paths without being cleaned up, leading to a memory leak issue.

This PR fix this issue by:
1. introduce a new util `FileExpirationUtils` for similar use of `Files.deleteOnExit`, with exposed method for evict file path from the list to prevent accumulative path list
2. adding a service `TempFileService ` in server module, periodical clean-up the files for operation logging path, uploaded resources and etc. And it evict the paths in `TempFileCleanupUtils` instance after cleanup.
3. add the new config `kyuubi.server.tempFile.expireTime` with a default value of 7 days, to control How often to trigger a file expiration clean-up for stale files

## Types of changes 🔖

- [ ] Bugfix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)

## Test Plan 🧪

#### Behavior Without This Pull Request ⚰️

#### Behavior With This Pull Request 🎉

#### Related Unit Tests

---

# Checklist 📝

- [ ] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html)

**Be nice. Be informative.**

Closes #6587 from bowenliang123/file-expiration.

Closes #6587

e23b72e08 [liangbowen] change to P14D
acaf370e7 [liangbowen] change config name to kyuubi.server.tempFile.expireTime
6c7ddd527 [liangbowen] import
ed1e4d76f [liangbowen] comment: ConcurrentHashMap.newKeySet
fbf73ccb4 [liangbowen] update
34d3fc71c [liangbowen] add guava to common module's dep
49c10e5ef [Bowen Liang] file expiration

Lead-authored-by: Bowen Liang  <liangbowen@gf.com.cn>
Co-authored-by: liangbowen <liangbowen@gf.com.cn>
Co-authored-by: Bowen Liang <liangbowen@gf.com.cn>
Signed-off-by: liangbowen <liangbowen@gf.com.cn>
2024-08-28 17:13:27 +08:00

270 lines
9.8 KiB
XML

<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.kyuubi</groupId>
<artifactId>kyuubi-parent</artifactId>
<version>1.10.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>kyuubi-common_${scala.binary.version}</artifactId>
<packaging>jar</packaging>
<name>Kyuubi Project Common</name>
<url>https://kyuubi.apache.org/</url>
<dependencies>
<dependency>
<groupId>org.apache.kyuubi</groupId>
<artifactId>kyuubi-util-scala_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jul-to-slf4j</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-runtime</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>ST4</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>org.apache.kyuubi</groupId>
<artifactId>kyuubi-relocated-thrift</artifactId>
</dependency>
<dependency>
<groupId>org.apache.kyuubi</groupId>
<artifactId>kyuubi-relocated-hive-service-rpc</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>org.apache.kyuubi</groupId>
<artifactId>kyuubi-util-scala_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minikdc</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.unboundid</groupId>
<artifactId>unboundid-ldapsdk</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatestplus</groupId>
<artifactId>mockito-4-11_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>failureaccess</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.jakewharton.fliptables</groupId>
<artifactId>fliptables</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.vladsch.flexmark</groupId>
<artifactId>flexmark-all</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>${project.basedir}/src/main/resources</directory>
</resource>
<resource>
<filtering>true</filtering>
<!-- Include the properties file to provide the build information. -->
<directory>${project.build.directory}/extra-resources</directory>
</resource>
</resources>
<testResources>
<testResource>
<directory>${project.basedir}/src/test/resources</directory>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>build-info</id>
<goals>
<goal>run</goal>
</goals>
<phase>generate-resources</phase>
<configuration>
<target>
<exec executable="bash" osfamily="unix">
<arg value="${project.basedir}/../build/kyuubi-build-info"></arg>
<arg value="${project.build.directory}/extra-resources"></arg>
<arg value="${project.version}"></arg>
<arg value="${java.version}"></arg>
<arg value="${scala.binary.version}"></arg>
<arg value="${spark.version}"></arg>
<arg value="${hive.version}"></arg>
<arg value="${hadoop.version}"></arg>
<arg value="${flink.version}"></arg>
<arg value="${trino.client.version}"></arg>
</exec>
<exec executable="cmd.exe" osfamily="windows">
<arg value="/c"></arg>
<arg value="${project.basedir}/../build/kyuubi-build-info.cmd"></arg>
<arg value="${project.build.directory}/extra-resources"></arg>
<arg value="${project.version}"></arg>
<arg value="${java.version}"></arg>
<arg value="${scala.binary.version}"></arg>
<arg value="${spark.version}"></arg>
<arg value="${hive.version}"></arg>
<arg value="${hadoop.version}"></arg>
<arg value="${flink.version}"></arg>
<arg value="${trino.client.version}"></arg>
</exec>
</target>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<id>prepare-test-jar</id>
<goals>
<goal>test-jar</goal>
</goals>
<phase>test-compile</phase>
</execution>
</executions>
</plugin>
</plugins>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
</build>
</project>