wg.Add(1)
go func() {
defer func() { wg.Done() }()
for i := range url {
doc, err := html.Parse(strings.NewReader(ParseHtml(url[i])))
if err != nil {
log.Fatal(err)
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
wg.Add(1)
go func() {
defer func() { wg.Done() }()
for _, value := range ParseEmail(ParseHtml(a.Val)) {
rw := &sync.RWMutex{}
rw.RLock()
_, ok := registryEMAIL[value]
rw.RUnlock()
if !ok {
rw.Lock()
registryEMAIL[value] = 0
rw.Unlock()
c <- value
}
}
}()
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
}
}()
go func() {
wg.Wait()
close(c)
}()
for msg := range c {
fmt.Println(msg)
}
}
func ParseHtml(ur string) string {
rw := &sync.RWMutex{}
rw.RLock()
_, ok := registry[ur]
rw.RUnlock()
if !ok {
rw.Lock()
registry[ur] = 0
rw.Unlock()
u, err := url.Parse(ur)
if err != nil {
return ""
}
if u.Host == "" {
u.Host = "www.google.com.ua"
}
if u.Scheme == "" {
u.Scheme = "https"
}
res, err := http.Get(u.String())
if err != nil {
//log.Println(err)
return ""
}
d, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
//log.Println(err)
return ""
}
return string(d)
} else {
return ""
}
}
func ParseEmail(str string) []string {
r := regexp.MustCompile("([a-z0-9_\\.\\-]+)\\@(([a-z0-9\\-])+\\.)+([a-z0-9]{2,6})")
return r.FindAllString(str, -1)
}
goroutine 5565 [IO wait]:
net.runtime_pollWait(0xb6c4a320, 0x72, 0x1b291000)
/usr/local/go/src/runtime/netpoll.go:160 +0x55
net.(*pollDesc).Wait(0x19c86178, 0x72, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:73 +0x35
net.(*pollDesc).WaitRead(0x19c86178, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:78 +0x33
net.(*netFD).Read(0x19c86140, 0x1b291000, 0x1000, 0x1000, 0x0, 0xb6c19030, 0x1870e0dc)
/usr/local/go/src/net/fd_unix.go:250 +0x19a
net.(*conn).Read(0x18946108, 0x1b291000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/local/go/src/net/net.go:172 +0xb9
net/http.noteEOFReader.Read(0xb6c1e690, 0x18946108, 0x197c9294, 0x1b291000, 0x1000, 0x1000, 0xb9cc00, 0x0, 0x0)
/usr/local/go/src/net/http/transport.go:1687 +0x55
net/http.(*noteEOFReader).Read(0x18930df0, 0x1b291000, 0x1000, 0x1000, 0xb755a000, 0x0, 0x0)
<autogenerated>:284 +0xae
bufio.(*Reader).fill(0x189f3350)
/usr/local/go/src/bufio/bufio.go:97 +0x172
bufio.(*Reader).Peek(0x189f3350, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/bufio/bufio.go:132 +0xad
net/http.(*persistConn).readLoop(0x197c9260)
/usr/local/go/src/net/http/transport.go:1073 +0x13e
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:857 +0xe21
goroutine 5566 [select]:
net/http.(*persistConn).writeLoop(0x197c9260)
/usr/local/go/src/net/http/transport.go:1277 +0x336
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:858 +0xe41
exit status 2
for i := range url {
doc, err := html.Parse(strings.NewReader(ParseHtml(url[i])))
if err != nil {
log.Fatal(err)
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
for _, value := range ParseEmail(ParseHtml(a.Val)) {
_, ok := registryEMAIL[value]
if !ok {
registryEMAIL[value] = 0
fmt.Println(value)
}
}
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
}