读取java文件里面所有中文
最近做国际化,提取代码的中文出来
import java.io.*; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern;/* @Classname Process* @Description* @Created by DELL*/ public class Process {private PrintWriter output = null;/* 看下自己IDEA的文件格式*/private String charset = "UTF-8";public Process(PrintWriter output, String charset) {this.output = output;this.charset = charset;}public List<String> getWord(String names){List<String> word = new ArrayList<>();List<String> strings = Arrays.asList(names.split("\\""));for (String string : strings) {Pattern p= Pattern.compile("[\\u4e00-\\u9fa5]");Matcher m = p.matcher(string);if(m.find()){word.add(string);}}return word;}public void readTxt(File tempFile) throws IOException {System.out.println(tempFile.getName());String packageName = Arrays.asList(tempFile.getPath().substring(tempFile.getPath().indexOf("DQMS") +5).split("\\\\"+File.separator)).get(0);if (tempFile.getName().indexOf(".java")>0){System.out.println("#" + tempFile.getName() + "\\n");String fileName = tempFile.getName().substring(0,tempFile.getName().lastIndexOf("."));BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(tempFile), "UTF-8"));String tempString = "";Pattern p= Pattern.compile("[\\u4e00-\\u9fa5]");/* 行號*/int i = 1;int j = 001;while ((tempString = reader.readLine()) != null) {tempString = tempString.trim();Matcher m = p.matcher(tempString);if (tempString.indexOf("*")<0&&tempString.indexOf("//")<0&&tempString.indexOf("@ResourcePermissions")<0&&m.find()&&tempString.indexOf("@OptLogAnnotation")<0&&tempString.indexOf("logger.")<0&&tempString.indexOf("<!--")<0){List<String> word = getWord(tempString);for (String s : word) {output.write(tempFile.getName() +"@@@"+ i +"@@@"+tempString +"@@@" +packageName+"_"+fileName +"_"+String.format("%04d" ,j) +"@@@"+s +"\\n");}j++;}i ++;}reader.close();}}public void readDir(String folder) throws IOException {File dir = new File(folder);if (dir.isDirectory()) {System.out.println("#Dir#" + dir.getName() + "\\n");//output.write("#Dir#" + dir.getName() + "\\n");String[] children = dir.list();for (int i = 0; i<children.length;i++){File tempFile = new File(dir, children[i]);if (tempFile.isDirectory()) {readDir(tempFile.getPath());} else {readTxt(tempFile);}}}} }
public class Test {//输出文件路径public static String outFile = "D:/ceshi.txt";//输入文件夹路径public static String inFolder = "D:\\\\work";public static String charset = "UTF-8";public static void main(String[] args) throws IOException {String tempString = "import java.io.FileWriter;";tempString = "3试试";Pattern p= Pattern.compile("[\\u4e00-\\u9fa5]");Matcher m = p.matcher(tempString);if (m.find()){System.out.println(tempString);}PrintWriter output = new PrintWriter(new FileWriter(new File(outFile)));Process process = new Process(output, charset);process.readDir(inFolder);output.close();}}