1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
import java.net.*; import java.io.*; import java.util.regex.*; import java.util.*;
public class { public static void main(String args[]) throws Exception { getMail(); }
public static void getMail() throws Exception { URL url = new URL("http://tieba.baidu.com/p/3856188322"); Queue<URL> q = new LinkedList<URL>(); HashMap hm = new HashMap(); hm.put(url, 1); q.add(url); while ((url = q.poll()) != null) { try { URLConnection con = url.openConnection(); BufferedReader bufin = new BufferedReader( new InputStreamReader(con.getInputStream())); } catch (Exception e) { continue; } URLConnection con = url.openConnection(); BufferedReader bufin = new BufferedReader(new InputStreamReader( con.getInputStream())); String line = null; String mailreg = "\[email protected]\w+[\.\w+]+"; String urlreg = "http.*?""; Pattern p = Pattern.compile(mailreg); Pattern purl = Pattern.compile(urlreg); while ((line = bufin.readLine()) != null) { Matcher m = p.matcher(line); Matcher murl = purl.matcher(line); while (m.find()) { System.out.println(m.group()); } while (murl.find()) { String tmp = murl.group().toString(); tmp = tmp.replaceAll(""", ""); boolean flag = false; try { URL ttmp = new URL(tmp); } catch (Exception e) { flag = true; continue; } URL ttmp = new URL(tmp); if(!hm.containsKey(ttmp)) q.add(ttmp); }
} } } }
|
近期评论