HDFS-常用API操作

一、Maven

<dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>RELEASE</version>
</dependency>
<dependency><groupId>org.apache.logging.log4j</groupId><artifactId>log4j-core</artifactId><version>2.8.2</version>
</dependency>
<!--  2、Hadoop      -->
<dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>3.2.1</version>
</dependency>
<dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>3.2.1</version>
</dependency>

如果Eclipse/Idea打印不出日志，在控制臺上只顯示：

log4j:WARN No appenders could be found for logger (org.apache.hadoop.util.Shell).  
log4j:WARN Please initialize the log4j system properly.  
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.

可以在項目的src/main/resources目錄下，新建一個文件，命名為“log4j.properties”，在文件中填入

log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

二、API操作

1、讀取某個目錄下的所有文件

public class CatFiles {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 創建要讀取的文件路徑Path listf = new Path("/testa");// 4 創建FileStatus對象，調用listStatus方法  ***FileStatus stats[]=fs.listStatus(listf);for(int i=0;i<stats.length;i++){System.out.println(stats[i].getPath().toString());}fs.close();// 5 返回成功信息System.out.println(" ps: 目錄文件查找完畢!!!");}
}

2、創建HDFS目錄

public class MkdirList {public static void main(String[] args) throws IOException, InterruptedException{// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration conf = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), conf, "root");// 3 創建一個目錄 *****Path path = new Path("/List");fs.mkdirs(path);// 4 關閉流fs.close();// 5 返回創建成功信息System.out.println(" ps: 目錄創建成功!!!");}}

3、判斷文件會否存在

public class ifExistsFlie {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 創建要讀取的文件路徑Path File = new Path("/testa");// 4 調用exists方法  返回boolean類型 ***boolean isExists = fs.exists(File);System.out.println(isExists);fs.close();// 5 返回成功信息System.out.println(" ps: 確認是否存在完畢!!!");}
}

4、查找某個文件的狀態信息

ublic class FindFile {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 對文件名進行操作   ***Path File = new Path("/testa/part-m-00000");// 4 創建FileStatus對象，調用getFileStatus方法FileStatus filestatus = fs.getFileStatus(File);System.out.println(filestatus);// 5 返回成功信息System.out.println(" ps: 查找信息成功!!!");}

FileStatus字段解析private Path path;                  - Path路徑private long length;                - 文件長度private boolean isdir;              - 是不是目錄private short block_replication;    - 塊的復本數private long blocksize;             - 塊大小private long modification_time;     - 修改時間private long access_time;           - 訪問時間private FsPermission permission;    - 權限private String owner;               - 所有者private String group;               - 所在組private Path symlink;               - 符號鏈接,如果isdir為true那么symlink必須為null

5、上傳本地文件

public class UploadFiles {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 創建源目的文件路徑和文件上傳操作 *****Path src = new Path("src/main/resources/HdfsCommand.txt");Path dst = new Path("/List/HdfsCommand.txt");fs.copyFromLocalFile(src, dst);// 4 關閉流fs.close();// 5 返回創建成功信息System.out.println(" ps: 文件上傳成功!!!");}
}

6、文件拷貝到本地

public class CopyFile {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 創建源目的文件路徑和文件上傳操作 *****Path src = new Path("/testa/part-m-00000");Path dst = new Path("src/main/");fs.copyToLocalFile(src,dst);// 4 關閉流fs.close();// 5 返回創建成功信息System.out.println(" ps: 文件拷貝成功!!!!");}
}

7、刪除文件/目錄

public class DeleteFile {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 將要刪除的文件/目錄路徑String File = "/List";// 4 刪除文件 返回boolean類型   ***fs.delete(new Path(File), true);fs.close();// 5 返回成功信息System.out.println(" ps: 文件刪除成功!!!");}
}

8、讀文件

public class ReadFile {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 文件路徑Path File = new Path("hdfs://Carlota1:9000/b.txt");// 4 創建FSDataInputStream對象FSDataInputStream in = fs.open(File);// 6 讀取數據String info = in.readUTF();System.out.println(info);// 7 關閉流fs.close();// 8 返回創建成功信息System.out.println(" ps: 文件讀取數據成功!!!");}}

9、重命名文件/目錄

public class RenameFile {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 對文件名進行操作   ***Path old = new Path("/test");Path now = new Path("/testa");// 4 調用hdfs的rename重命名方法，返回值為boolean類型    ***fs.rename(old, now);// 5 返回成功信息System.out.println(" ps: 文件重命名成功!!!");}}

10、寫文件

public class WriteFile {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 文件路徑 ***Path File=new Path("hdfs://Carlota1:9000/b.txt");// 4 創建FSDataOutputStream對象 ***FSDataOutputStream out = fs.create(File);// 6 寫入數據 ***out.writeUTF("Hello world!！");// 7 關閉流fs.close();// 8 返回創建成功信息System.out.println(" ps: 文件寫入數據成功!!");}}

11、返回文件/目錄上次修改的時間

public class ReviseTime {public static void main(String[] args) throws IOException, InterruptedException {// 0 自動快速地使用缺省Log4j環境。BasicConfigurator.configure();// 1 獲取文件系統Configuration configuration = new Configuration();// 2 配置在集群上運行FileSystem fs = FileSystem.get(URI.create("hdfs://Carlota1:9000"), configuration, "root");// 3 創建要讀取的文件路徑Path File = new Path("/testa");// 4 創建FileStatus對象，調用listStatus方法FileStatus filestatus = fs.getFileStatus(File);// 5 調用getModificationTime方法 返回值類型為longlong time = filestatus.getModificationTime();// 6 轉換long類型為DataSimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");Date date = new Date(time);System.out.println(simpleDateFormat.format(date));fs.close();// 6 返回成功信息System.out.println(" ps: 返回信息成功!!!");}
}