首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 计算机考试 > 等级考试 > 复习指导 >

使用java将网页保存为mht格式(2)(1)

2009-01-05 
mht格式网页

    //设置网页正文
  MimeBodyPart bp = new MimeBodyPart();
  bp.setText(content, strEncoding);
  bp.addHeader("Content-Type", "text/html;charset=" + strEncoding);
  bp.addHeader("Content-Location", strWeb.toString());
  mp.addBodyPart(bp);
  int urlCount = urlScriptList.size();
  for (int i = 0; i < urlCount; i++) {
  bp = new MimeBodyPart();
  ArrayList urlInfo = (ArrayList) urlScriptList.get(i);
  // String url = urlInfo.get(0).toString();
  String absoluteURL = urlInfo.get(1).toString();
  bp
  .addHeader("Content-Location",
  javax.mail.internet.MimeUtility
  .encodeWord(java.net.URLDecoder
  .decode(absoluteURL, strEncoding)));
  DataSource source = new AttachmentDataSource(absoluteURL, "text");
  bp.setDataHandler(new DataHandler(source));
  mp.addBodyPart(bp);
  }
  urlCount = urlImageList.size();
  for (int i = 0; i < urlCount; i++) {
  bp = new MimeBodyPart();
  ArrayList urlInfo = (ArrayList) urlImageList.get(i);
  // String url = urlInfo.get(0).toString();
  String absoluteURL = urlInfo.get(1).toString();
  bp
  .addHeader("Content-Location",
  javax.mail.internet.MimeUtility
  .encodeWord(java.net.URLDecoder
  .decode(absoluteURL, strEncoding)));
  DataSource source = new AttachmentDataSource(absoluteURL, "image");
  bp.setDataHandler(new DataHandler(source));
  mp.addBodyPart(bp);
  }
  msg.setContent(mp);
  // write the mime multi part message to a file
  msg.writeTo(new FileOutputStream(strFileName));
  }
  /**
  *方法说明:mht转html
  *输入参数:strMht mht文件路径; strHtml html文件路径
  *返回类型:
  */
  public static void mht2html(String strMht, String strHtml) {
  try {
  //TODO readEmlFile
  InputStream fis = new FileInputStream(strMht);
  Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
  MimeMessage msg = new MimeMessage(mailSession, fis);
  Object content = msg.getContent();
  if (content instanceof Multipart) {
  MimeMultipart mp = (MimeMultipart)content;
  MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);
  String strEncodng = getEncoding(bp1);
  String strText = getHtmlText(bp1, strEncodng);
  if (strText == null)
  return;
  File parent = null;
  if (mp.getCount() > 1) {
  parent = new File(new File(strHtml).getAbsolutePath() + ".files");
  parent.mkdirs();
  if (!parent.exists())
  return;
  }
  for (int i = 1; i < mp.getCount(); ++i) {
  MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);
  String strUrl = getResourcesUrl(bp);
  if (strUrl == null)
  continue;
  DataHandler dataHandler = bp.getDataHandler();
  MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();
  File resources = new File(parent.getAbsolutePath() + File.separator + getName(strUrl, i));
  if (saveResourcesFile(resources, bp.getInputStream()))
  strText = JHtmlClear.replace(strText, strUrl, resources.getAbsolutePath());
  }
  saveHtml(strText, strHtml);
  }
  } catch (Exception e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
  }
  }
  /**
  *方法说明:得到资源文件的name
  *输入参数:strName 资源文件链接, ID 资源文件的序号
  *返回类型:资源文件的本地临时文件名
  */
  public static String getName(String strName, int ID) {
  char separator = ’/’;
  System.out.println(strName);
  System.out.println(separator);
  if( strName.lastIndexOf(separator) >= 0)
  return format(strName.substring(strName.lastIndexOf(separator) + 1));
  return "temp" + ID;
  }
  /**
  *方法说明:得到网页编码
  *输入参数:bp MimeBodyPart类型的网页内容
  *返回类型:MimeBodyPart里的网页内容的编码
  */
  private static String getEncoding(MimeBodyPart bp) {
  if (bp != null) {
  try {
  Enumeration list = bp.getAllHeaders();
  while (list.hasMoreElements()) {
  javax.mail.Header head = (javax.mail.Header)list.nextElement();
  if (head.getName().compareTo("Content-Type") == 0) {
  String strType = head.getValue();
  int pos = strType.indexOf("charset=");
  if (pos != -1) {
  String strEncoding = strType.substring(pos + 8, strType.length());
  if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
  strEncoding = "gbk";
  }
  return strEncoding;
  }
  }
  }
  } catch (MessagingException e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
  }
  }
  return null;
  }
  /**
  *方法说明:得到资源文件url
  *输入参数:bp MimeBodyPart类型的网页内容
  *返回类型:资源文件url
  */

    private static String getResourcesUrl(MimeBodyPart bp) {
  if (bp != null) {
  try {
  Enumeration list = bp.getAllHeaders();
  while (list.hasMoreElements()) {
  javax.mail.Header head = (javax.mail.Header)list.nextElement();
  if (head.getName().compareTo("Content-Location") == 0) {
  return head.getValue();
  }
  }
  } catch (MessagingException e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
  }
  }
  return null;
  }
  /**
  *方法说明:格式化文件名
  *输入参数:strName 文件名
  *返回类型:经过处理的符合命名规则的文件名
  */
  private static String format(String strName) {
  if (strName == null)
  return null;
  strName = strName.replaceAll(" ", " ");
  String strText = "/:*?"<>|^___FCKpd___0quot;;
  for (int i = 0; i < strName.length(); ++i) {
  String ch = String.valueOf(strName.charAt(i));
  if (strText.indexOf(ch) != -1) {
  strName = strName.replace(strName.charAt(i), ’-’);
  }
  }
  return strName;
  }
  /**
  *方法说明:保存资源文件
  *输入参数:resources 要创建的资源文件; inputStream 要输入文件中的流
  *返回类型:boolean
  */
  private static boolean saveResourcesFile(File resources, InputStream inputStream) {
  if (resources == null || inputStream == null) {
  return false;
  }
  BufferedInputStream in = null;
  FileOutputStream fio = null;
  BufferedOutputStream osw = null;
  try {
  in = new BufferedInputStream(inputStream);
  fio = new FileOutputStream(resources);
  osw = new BufferedOutputStream(new DataOutputStream(fio));
  int b;
  byte[] a = new byte[1024];
  boolean isEmpty = true;
  while ((b = in.read(a)) != -1) {
  isEmpty = false;
  osw.write(a, 0, b);
  osw.flush();
  }
  osw.close();
  fio.close();
  in.close();
  inputStream.close();
  if (isEmpty)
  resources.delete();
  return true;
  } catch (Exception e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
  System.out.println("解析mht失败");
  return false;
  } finally{
  try {
  if (osw != null)
  osw.close();
  if (fio != null)
  fio.close();
  if (in != null)
  in.close();
  if (inputStream != null)
  inputStream.close();
  } catch (Exception e) {
  e.printStackTrace();
  System.out.println("解析mht失败");
  return false;
  }
  }
  }
  /**

热点排行