Java去除源代码注释
总体思路是对待分析的带注释段的字符串进行遍历,声明一个缓冲字符串变量来记录非注释的部分,最后返回这个缓冲字符串变量作为结果
1.首先考虑/*comment*/形式的注释
当遇到/*部分便停止记录,继续往后遍历到*/部分,实现跳过/**/段
2.考虑/*comment/*inside*/out*/形式的嵌套注释
声明一个数字变量来记录/*的开始的次数,遇到一个/*就+1,遇到一个*/就-1,实现嵌套匹配
3.考虑双斜杠注释
发现//形式的字符串的时候表明遇到了双斜杠注释,这时候使用while循环继续向后遍历,直到发现一个换行符,从而跳过整个这一行
4.考虑双引号
双引号中的注释部分是不能去掉的,比如print("//Hello"World"/*comment*/");
所以上面几条所考虑的情况都应该是在双引号范围之外,所以应该最先匹配双引号。如果没有出现双引号,则按照上面的规则处理
如果发现了开始双引号,在匹配结束双引号的时候要注意可能会遇到转义双引号,需要跳过以\开始的双引号,从而匹配到正确的结束双引号
import java.io.BufferedReader;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;public class main{/** * @param args */public static void main(String[] args){// TODO Auto-generated method stubString test1 = "printf("Hello /* a comment /* a comment inside comment */ inside /* another comment inside comment */ string */ world")";String test2 = "//*no recursion*/* file header */";String test3 = "//*no recursion*/* file header***********/************* Sample input program **********/**************/int spawn_workers(int worker_count) {/* The block below is supposed to spawn 100 workers. But it creates many more. Commented until I figure out why for (int i = 0; i < worker_count; ++i) { if(!fork()) { /* This is the worker. Start working. */ do_work(); } } */ return 0; /* successfully spawned 100 workers */}int main() {printf("Hello /*a comment inside string*/ world"); int worker_count = 0/*octal number*/144; if (spawn_workers(worker_count) != 0) { exit(-1); } return 0;}";String test = ReadFileToString("E:/main.java");//System.out.println(removeComments(test));//System.out.println(removeCommentsWithQuote(test));System.out.println(removeCommentsWithQuoteAndDoubleEscape(test));}/**//** * 简单的直接去掉星号斜杠注释段 * @param code * @return */public static String removeComments(String code){StringBuilder sb = new StringBuilder();int cnt = 0;for (int i = 0; i < code.length(); i++){if(cnt == 0){if(i+1 < code.length() && code.charAt(i) == '/' && code.charAt(i+1) == '*'){cnt++;i++;continue;}}else{if(i+1 < code.length() && code.charAt(i) == '*' && code.charAt(i+1) == '/'){cnt--;i++;continue;}if(i+1 < code.length() && code.charAt(i) == '/' && code.charAt(i+1) == '*'){cnt++;i++;continue;}}if(cnt == 0){sb.append(code.charAt(i));}}return sb.toString();}/** * 处理带双引号的注释 * @param code * @return */public static String removeCommentsWithQuote(String code){StringBuilder sb = new StringBuilder();int cnt = 0;boolean quoteFlag = false;for (int i = 0; i < code.length(); i++){//如果没有开始双引号范围if(!quoteFlag){//如果发现双引号开始if(code.charAt(i) == '"'){sb.append(code.charAt(i));quoteFlag = true;continue;}//不在双引号范围内else{//处理/**/注释段if(cnt == 0){if(i+1 < code.length() && code.charAt(i) == '/' && code.charAt(i+1) == '*'){cnt++;i++;continue;}}else{if(i+1 < code.length() && code.charAt(i) == '*' && code.charAt(i+1) == '/'){cnt--;i++;continue;}if(i+1 < code.length() && code.charAt(i) == '/' && code.charAt(i+1) == '*'){cnt++;i++;continue;}}//如果没有发现/**/段或者已经处理完了嵌套的/**/if(cnt == 0){sb.append(code.charAt(i));continue;}}}//处理双引号段else{//如果发现双引号结束(非转移形式的双引号)if(code.charAt(i) == '"' && code.charAt(i-1) != '\\'){sb.append(code.charAt(i));quoteFlag = false;}//双引号开始了但是还没有结束else{sb.append(code.charAt(i));}}}return sb.toString();}/** * 处理双引号和双斜杠注释 * @param code * @return */public static String removeCommentsWithQuoteAndDoubleEscape(String code){StringBuilder sb = new StringBuilder();int cnt = 0;boolean quoteFlag = false;for (int i = 0; i < code.length(); i++){//如果没有开始双引号范围if(!quoteFlag){//如果发现双引号开始if(code.charAt(i) == '"'){sb.append(code.charAt(i));quoteFlag = true;continue;}//处理双斜杠注释else if(i+1 < code.length() && code.charAt(i) == '/' && code.charAt(i+1) == '/'){while(code.charAt(i) != '\n'){i++;}continue;}//不在双引号范围内else{//处理/**/注释段if(cnt == 0){if(i+1 < code.length() && code.charAt(i) == '/' && code.charAt(i+1) == '*'){cnt++;i++;continue;}}else{//发现"*/"结尾if(i+1 < code.length() && code.charAt(i) == '*' && code.charAt(i+1) == '/'){cnt--;i++;continue;}//发现"/*"嵌套if(i+1 < code.length() && code.charAt(i) == '/' && code.charAt(i+1) == '*'){cnt++;i++;continue;}}//如果没有发现/**/注释段或者已经处理完了嵌套的/**/注释段if(cnt == 0){sb.append(code.charAt(i));continue;}}}//处理双引号注释段else{//如果发现双引号结束(非转义形式的双引号)if(code.charAt(i) == '"' && code.charAt(i-1) != '\\'){sb.append(code.charAt(i));quoteFlag = false;}//双引号开始了但是还没有结束else{sb.append(code.charAt(i));}}}return sb.toString();}/** * 从一个文件读入到String * @param FilePath * @return */public static String ReadFileToString(String FilePath){FileInputStream fis = null;BufferedReader br = null;try{fis = new FileInputStream(FilePath);br = new BufferedReader(new InputStreamReader(fis, "utf-8"));} catch (IOException e){e.printStackTrace();}//构建成StringStringBuffer sb = new StringBuffer();String temp = null;try{while((temp = br.readLine()) != null){sb.append(temp+'\n');}} catch (IOException e){e.printStackTrace();}return sb.toString();}}