我正在尝试编写一个简单的程序,其行为类似于 find |在 golang 中 grep。我的程序都使用以下模式使用 goroutines 工作:
goroutine(filech <- 找到的每个文件) goroutine(将文件存储在基于扩展的类别<- grepch)
每个 filech 文件的 goroutine(grepch <- 如果文件包含字符串)
这一切都按预期工作,但是当出现大量文件时,内存只会不断增长。我研究了 Go 提供的一些分析工具,但我不知道如何找到我的内存泄漏。我可以说内存主要被 bytes.makeSlice 用完了。
谁能看看下面的代码,看看我做错了什么?另外,我想知道我的代码有什么问题,但我也想学习如何在未来自己调试它,所以如果你能为这样的问题提供详细的分析说明,那就是非常感谢。
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"regexp"
"runtime/pprof"
"strings"
"sync"
)
var (
topDir string
cProf bool
mProf bool
cProfFile *os.File
mProfFile *os.File
fileNames []string
fileTypes []string
fileLists map[string][]string
cMatch = regexp.MustCompile(`(?i)^.*\.(?:c|h|cc|cpp|c\+\+|hpp)$`)
javaMatch = regexp.MustCompile(`(?i)^.*\.(?:java|js)$`)
goMatch = regexp.MustCompile(`(?i)^.*\.(?:go)$`)
buildMatch = regexp.MustCompile(`(?i)^.*\.(?:gradle|mk|mka)$`)
buildMatch2 = regexp.MustCompile(`^.*/(?:Makefile[^/\\]*)$`)
regMatch = regexp.MustCompile(`(?i)(?:test|debug)`)
)
func init() {
fileLists = make(map[string][]string)
}
func main() {
flag.StringVar(&topDir, "d", ".", "The top level directory to process (default is current directory)")
flag.BoolVar(&cProf, "c", false, "Include if you want to save the CPU profile")
flag.BoolVar(&mProf, "m", false, "Include if you want to save the MEM profile")
flag.Parse()
cProfFunc()
getFilesChan := make(chan string, 1000)
grepFilesChan := make(chan string, 100)
go getFileNamesOverChan(topDir, getFilesChan)
var fileResult string
var grepWg sync.WaitGroup
var categorizeWg sync.WaitGroup
fileTypes = append(fileTypes, "C", "Java", "Go", "Build", "Uncategorized")
categorizeWg.Add(1)
go func(chan string) {
var grepResult string
for grepResult = range grepFilesChan {
if grepResult != "" {
fmt.Printf("Found file %s with text\n", grepResult)
var fileType = getFileCategory(grepResult)
fileLists[fileType] = append(fileLists[fileType], grepResult)
}
}
categorizeWg.Done()
}(grepFilesChan)
for fileResult = range getFilesChan {
if fileResult != "" {
fileNames = append(fileNames, fileResult)
grepWg.Add(1)
go func(file string, ch chan string) {
fmt.Printf("Grepping file %s\n", file)
grepOverChan(file, ch)
grepWg.Done()
}(fileResult, grepFilesChan)
}
}
grepWg.Wait()
close(grepFilesChan)
categorizeWg.Wait()
printSummary()
mProfFunc()
defer pprof.StopCPUProfile()
defer cProfFile.Close()
}
func cProfFunc() {
if cProf {
cProfFile, _ = os.Create("cpu_profile.pprof")
//handle err
_ = pprof.StartCPUProfile(cProfFile)
//handle err
}
}
func mProfFunc() {
if mProf {
mProfFile, _ = os.Create("mem_profile.pprof")
//handle err
_ = pprof.WriteHeapProfile(mProfFile)
//handle err
defer mProfFile.Close()
}
}
func printSummary() {
fmt.Printf("\n\nProcessed %d Files\n\n", len(fileNames))
fmt.Println("")
fmt.Println("Found text in the following files:")
for _, fType := range fileTypes {
fmt.Printf("Found text in %d %s Files\n", len(fileLists[fType]), fType)
}
/*
for _, fType := range fileTypes {
if len(fileLists[fType]) > 0 {
fmt.Println("")
fmt.Printf("\t%s Files:\n", fType)
}
for _, fileName := range fileLists[fType] {
fmt.Printf("\t\t%s\n", fileName)
}
}
*/
}
func getFileNamesOverChan(directory string, ch chan string) {
fmt.Printf("Finding files in directory %s\n", directory)
var err error
var dirInfo os.FileInfo
dirInfo, err = os.Lstat(directory)
if err != nil {
close(ch)
return
}
if !dirInfo.IsDir() {
close(ch)
return
}
recursiveGetFilesOverChan(directory, ch)
close(ch)
}
func recursiveGetFilesOverChan(dir string, ch chan string) {
dirFile, _ := os.Open(dir)
//handle err
defer dirFile.Close()
dirFileInfo, _ := dirFile.Readdir(0)
//handle err
for _, file := range dirFileInfo {
filePath := fmt.Sprintf("%s%c%s", dir, os.PathSeparator, file.Name())
switch mode := file.Mode(); {
case mode.IsDir():
//is a directory ... recurse
recursiveGetFilesOverChan(filePath, ch)
case mode.IsRegular():
//is a regular file ... send it if it is not a CVS or GIT file
if !strings.Contains(filePath, "/CVS/") && !strings.Contains(filePath, "/.git/") {
fmt.Printf("Found File %s\n", filePath)
ch <- filePath
}
case mode&os.ModeSymlink != 0:
//is a symbolic link ... skip it
continue
case mode&os.ModeNamedPipe != 0:
//is a Named Pipe ... skip it
continue
}
}
}
func getFileCategory(file string) string {
var fileType string
switch {
case cMatch.MatchString(file):
fileType = "C"
case javaMatch.MatchString(file):
fileType = "Java"
case goMatch.MatchString(file):
fileType = "Go"
case buildMatch.MatchString(file):
fileType = "Build"
case buildMatch2.MatchString(file):
fileType = "Build"
default:
fileType = "Uncategorized"
}
return fileType
}
func grepOverChan(f string, ch chan string) {
fileBytes, _ := ioutil.ReadFile(f)
if regMatch.Match(fileBytes) {
ch <- f
}
}
最佳答案
根据@JimB 对我的问题的评论,我能够弄清楚这不是所谓的内存泄漏,而是无限并发的问题。我的原始代码是在无限制地遇到每个文件时启动 grep。
我能够通过限制在任何时候用 grep 打开的文件数量来解决这个问题。使用 http://jmoiron.net/blog/limiting-concurrency-in-go/ 提供的示例.在这个链接中,他们创建了一个 semaphoreChannel,它将只接受限制数量的消息。打开文件前向该 channel 写入一个值,搜索完文件后从该 channel 读取一个值。最后等待 semaphoreChannel 再次填满。
这是与我损坏的原始代码相对应的工作代码(相关部分请参见 grepConcurrencyLimit
和 semaphoreChan
):
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"regexp"
"runtime/pprof"
"strings"
"sync"
)
var (
topDir string
cProf bool
mProf bool
cProfFile *os.File
mProfFile *os.File
fileNames []string
fileTypes []string
fileLists map[string][]string
grepConcurrencyLimit int
cMatch = regexp.MustCompile(`(?i)^.*\.(?:c|h|cc|cpp|c\+\+|hpp)$`)
javaMatch = regexp.MustCompile(`(?i)^.*\.(?:java|js)$`)
goMatch = regexp.MustCompile(`(?i)^.*\.(?:go)$`)
buildMatch = regexp.MustCompile(`(?i)^.*\.(?:gradle|mk|mka)$`)
buildMatch2 = regexp.MustCompile(`^.*/(?:Makefile[^/\\]*)$`)
regMatch = regexp.MustCompile(`(?i)(?:test|debug)`)
)
func init() {
fileLists = make(map[string][]string)
}
func main() {
flag.StringVar(&topDir, "d", ".", "The top level directory to process (default is current directory)")
flag.IntVar(&grepConcurrencyLimit, "l", 50, "The limit of number of files to grep at any one time")
flag.BoolVar(&cProf, "c", false, "Include if you want to save the CPU profile")
flag.BoolVar(&mProf, "m", false, "Include if you want to save the MEM profile")
flag.Parse()
cProfFunc()
getFilesChan := make(chan string, 1000)
grepFilesChan := make(chan string, 100)
// This channel is to ensure that only grepConcurrencyLimit files are ever grepped at any one time
semaphoreChan := make(chan bool, grepConcurrencyLimit)
go getFileNamesOverChan(topDir, getFilesChan)
var fileResult string
var grepWg sync.WaitGroup
var categorizeWg sync.WaitGroup
fileTypes = append(fileTypes, "C", "Java", "Go", "Build", "Uncategorized")
categorizeWg.Add(1)
go func(chan string) {
var grepResult string
for grepResult = range grepFilesChan {
if grepResult != "" {
fmt.Printf("Found file %s with text\n", grepResult)
var fileType = getFileCategory(grepResult)
fileLists[fileType] = append(fileLists[fileType], grepResult)
}
}
categorizeWg.Done()
}(grepFilesChan)
for fileResult = range getFilesChan {
if fileResult != "" {
fileNames = append(fileNames, fileResult)
grepWg.Add(1)
// write a boolean to semaphoreChan to take up one of the concurrency limit spots
semaphoreChan <- true
go func(file string, ch chan string) {
fmt.Printf("Grepping file %s\n", file)
//run the function to read a boolean from semaphoreChan to release one of the concurrency limit spots
defer func() { <-semaphoreChan }()
grepOverChan(file, ch)
grepWg.Done()
}(fileResult, grepFilesChan)
}
}
// refill semaphoreChan to capacity to wait until all of the final go routines have completed.
for i := 0; i < cap(semaphoreChan); i++ {
semaphoreChan <- true
}
grepWg.Wait()
close(grepFilesChan)
categorizeWg.Wait()
printSummary()
mProfFunc()
defer pprof.StopCPUProfile()
defer cProfFile.Close()
}
func cProfFunc() {
if cProf {
cProfFile, _ = os.Create("cpu_profile.pprof")
//handle err
_ = pprof.StartCPUProfile(cProfFile)
//handle err
}
}
func mProfFunc() {
if mProf {
mProfFile, _ = os.Create("mem_profile.pprof")
//handle err
_ = pprof.WriteHeapProfile(mProfFile)
//handle err
defer mProfFile.Close()
}
}
func printSummary() {
fmt.Printf("\n\nProcessed %d Files\n\n", len(fileNames))
fmt.Println("")
fmt.Println("Found text in the following files:")
for _, fType := range fileTypes {
fmt.Printf("Found text in %d %s Files\n", len(fileLists[fType]), fType)
}
/*
for _, fType := range fileTypes {
if len(fileLists[fType]) > 0 {
fmt.Println("")
fmt.Printf("\t%s Files:\n", fType)
}
for _, fileName := range fileLists[fType] {
fmt.Printf("\t\t%s\n", fileName)
}
}
*/
}
func getFileNamesOverChan(directory string, ch chan string) {
fmt.Printf("Finding files in directory %s\n", directory)
var err error
var dirInfo os.FileInfo
dirInfo, err = os.Lstat(directory)
if err != nil {
close(ch)
return
}
if !dirInfo.IsDir() {
close(ch)
return
}
recursiveGetFilesOverChan(directory, ch)
close(ch)
}
func recursiveGetFilesOverChan(dir string, ch chan string) {
dirFile, _ := os.Open(dir)
//handle err
defer dirFile.Close()
dirFileInfo, _ := dirFile.Readdir(0)
//handle err
for _, file := range dirFileInfo {
filePath := fmt.Sprintf("%s%c%s", dir, os.PathSeparator, file.Name())
switch mode := file.Mode(); {
case mode.IsDir():
//is a directory ... recurse
recursiveGetFilesOverChan(filePath, ch)
case mode.IsRegular():
//is a regular file ... send it if it is not a CVS or GIT file
if !strings.Contains(filePath, "/CVS/") && !strings.Contains(filePath, "/.git/") {
fmt.Printf("Found File %s\n", filePath)
ch <- filePath
}
case mode&os.ModeSymlink != 0:
//is a symbolic link ... skip it
continue
case mode&os.ModeNamedPipe != 0:
//is a Named Pipe ... skip it
continue
}
}
}
func getFileCategory(file string) string {
var fileType string
switch {
case cMatch.MatchString(file):
fileType = "C"
case javaMatch.MatchString(file):
fileType = "Java"
case goMatch.MatchString(file):
fileType = "Go"
case buildMatch.MatchString(file):
fileType = "Build"
case buildMatch2.MatchString(file):
fileType = "Build"
default:
fileType = "Uncategorized"
}
return fileType
}
func grepOverChan(f string, ch chan string) {
fileBytes, _ := ioutil.ReadFile(f)
if regMatch.Match(fileBytes) {
ch <- f
}
}
关于Golang 程序泄漏内存,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50186147/