我正在 ubuntu 16.04 linux 上使用 C++ 开发文件爬虫。
文件爬虫是我对Andrew Tanenbaum的现代操作系统第4版
书中的一个问题的解决方案,内容如下:
Write a program that starts at a given directory and descends the file tree from that point recording the sizes of all the files it finds. When it is all done, it should print a histogram of the file sizes using a bin width specified as a parameter (e.g., with 1024, file sizes of 0 to 1023 go in one bin, 1024 to 2047 go in the next bin, etc.).
它将exename、目录和bin大小作为参数,并爬行目录,将文件添加到存储为双向链表的bin中。
例如,如果binWidth或(argv[2])
为10,则它将在Bin 0中存储字节大小为0-9的文件,如果您找到大小为10或更大的文件,它会创建一个大小为10-18的新节点并将其存储在那里并将其标记为Bin 1。
如果我有一个嵌套在另一个目录内的目录,我需要打开它以递归地遍历文件,则会出现我的问题。我有一个函数 traverseNewDirectory
,旨在递归地遍历并找到它们。
我相信该错误存在于我的复制构造函数中。
我的代码如下:
// Directory crawler
// Written by Kaz
#include<iostream>
#include <dirent.h>
#include<string.h>
#include <errno.h>
#include <stdio.h>
#include<string>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include<stdlib.h>
using namespace std;
int binCount = 0; // count of total bins which are nodes in the linked-list
struct node{
int count, name, min, max;
node* next, *prev;
node(){
name = binCount;
count = 0;
min = 0;
max = 0;
prev = NULL;
next = NULL;
}
node(node *other){
if(other == NULL){
}
else{
node* objCopy = other;
node* temp = this;
while(objCopy != NULL){
temp->next = new node;
temp->next->name = objCopy->name;
temp->next->count = objCopy->count;
temp->next->min = objCopy->min;
temp->next->max = objCopy->max;
temp->next->prev = objCopy->prev;
temp = temp->next;
objCopy = objCopy->next;
}
}
}
};
/*
void nextNode(node* previousNode, int binWidth){
node *nextLink = new node;
nextLink->count = 1;
nextLink->min = previousNode->max + 1;
nextLink->max = previousNode->max + binWidth;
nextLink->prev = previousNode;
previousNode ->next = nextLink;
}
*/
node* traverseNewDirectory(node *here, const char *dirName, int binWidth){
DIR * nwd;
struct dirent *dip;
node * current = new node(here);
// Deep copy?
//current = here;
bool isadirectory,isHidden;
if((nwd = opendir(dirName))== NULL){
perror("Can't open derived directory");
return NULL;
}
while ((dip = readdir(nwd)) != NULL){
isadirectory = false;
isHidden = false;
if((dip -> d_type) == DT_UNKNOWN ){
struct stat stbuf;
stat(dip->d_name, &stbuf);
isadirectory = S_ISDIR(stbuf.st_mode);
}
else if((dip -> d_type) == DT_DIR ){
if((strcmp(dip->d_name, ".") == 0) || (strcmp(dip->d_name, "..")) == 0){
isHidden = true;
isadirectory = true;
}
else{
isadirectory = true;
}
}
else{
if((dip-> d_reclen <= current->max)&&(dip->d_reclen >=current->min)){
current->count = current->count+1;
}
else if(dip->d_reclen < current->min){
node*temp = current->prev;
while(temp != NULL){
if((dip-> d_reclen <= temp->max)&&(dip->d_reclen >=temp->min)){
temp->count = temp->count+1;
break;
}
else if(dip->d_reclen < temp->min){
temp = temp->prev;
}
}
}
else{
current->next = new node;
current->next->count = 1;
current->next->min = current->max + 1;
current->next->max = current->max + binWidth;
current->next->prev = current;
current = current->next;
binCount++;
}
}
if(isadirectory){
string path = string(dirName) + "/"+dip->d_name;
/*
strcpy(path,dirName);
strcat(path, "/");
strcat(path,dip->d_name);
strcat(path, "\0");
*/
if(isHidden == true){}
else{
current->next = new node(traverseNewDirectory(current, path.c_str(), binWidth));
if(current->next != NULL){
current = current->next;
binCount++;
}
}
}
}
while ( ( closedir (nwd) == -1) && ( errno == EINTR) );
if(current == here){
return NULL;
}
else{
return current;
}
}
void printHistogram(node *head){
node*temp;
temp = head;
while(temp!=NULL){
cout << "[B " << temp->name << "] from " << temp->min << " to " << temp->max << " : ";
for(int i = 0; i < temp->count; i++){
cout << "x";
}
cout << endl;
temp = temp->next;
}
}
int main(int argc,char *argv[]){
// Ensures that a valid directory is provided by the cmd line argument
if (argc != 3){
if(argc == 1){
fprintf (stderr, " argc = %d no directory given \n", argc);
return 1;
}
else if(argc == 2){
fprintf (stderr, " argc = %d no size given \n", argc);
return 2;
}
else{
fprintf(stderr, "argc = %d invalid parameters \n", argc);
return 3;
}
}
DIR * cwd; // current working directory pointer
struct dirent *cwdP; // pointer to dirent struct
int binWidth; // variable for the width of the grouping in the histogram
binWidth = atoi(argv[2]);
binWidth = binWidth - 1;
node *first = new node;
first->max = binWidth;
binCount++;
node * current;
current = first;
bool isadirectory,isHidden;
if((cwd = opendir(argv[1]))== NULL){
perror("Can't open main directory");
return 2;
}
while ((cwdP = readdir(cwd)) != NULL){
isadirectory = false;
isHidden = false;
if((cwdP -> d_type) == DT_UNKNOWN ){
struct stat stbuf;
stat(cwdP->d_name, &stbuf);
isadirectory = S_ISDIR(stbuf.st_mode);
}
else if((cwdP -> d_type) == DT_DIR ){
if((strcmp(cwdP->d_name, ".") == 0) || (strcmp(cwdP->d_name, "..")) == 0){
isHidden = true;
isadirectory = true;
}
else{
isadirectory = true;
}
}
else{
if((cwdP-> d_reclen <= current->max)&&(cwdP->d_reclen >=current->min)){
current->count = current->count+1;
}
else if(cwdP->d_reclen < current->min){
node*temp = current->prev;
while(temp != NULL){
if((cwdP-> d_reclen <= temp->max)&&(cwdP->d_reclen >=temp->min)){
temp->count = temp->count+1;
break;
}
else if(cwdP->d_reclen < temp->min){
temp = temp->prev;
}
}
}
else{
/*
nextNode(current,binWidth);
current = current ->next;
//binCount++;
*/
current->next = new node;
current->next->count = 1;
current->next->min = current->max + 1;
current->next->max = current->max + binWidth;
current->next->prev = current;
current = current->next;
binCount++;
}
}
if(isadirectory){
string fullPath = string(argv[1]) + "/" + cwdP ->d_name;
/*
strcpy(path,dirName);
strcat(path, "/");
strcat(path,dip->d_name);
strcat(path, "\0");
*/
if(isHidden == true){}
else{
current->next = new node(traverseNewDirectory(current, fullPath.c_str(), binWidth));
if(current->next != NULL){
current = current->next;
binCount++;
}
}
}
}
while ( ( closedir (cwd) == -1) && ( errno == EINTR) );
printHistogram(first);
return 0;
}
最后编辑
我要非常感谢 Igor、J.H、Toby 和其他所有发表评论的人,他们给了我一些关于如何处理链表的建议。我的代码现在完全解决了这个问题。我能够通过将我的方法从双向链接结构列表简化为仅具有几个指针且没有复制构造函数的单链接结构列表来实现它。尽管所有的答案、建议和技巧都没有给我直接的答案,但它激发了我的创造力,通过坚持不懈和研究,我能够解决这个问题。为此,我要感谢所有花时间查看我的帖子的人。
最佳答案
对于复制构造函数,您可能需要这样的东西:
node(const node& other) {
node* prev = nullptr;
node* cur = this;
const node* old_cur = &other;
for (;;) {
cur->count = old_cur->count;
cur->min = old_cur->min;
cur->max = old_cur->max;
cur->prev = prev;
if (old_cur->next) {
old_cur = old_cur->next;
cur->next = new node();
prev = cur;
cur = cur->next;
} else {
cur->next = nullptr;
break;
}
}
}
尽管不清楚您是否需要任何形式的拷贝或伪拷贝构造函数。 traverseNewDirectory
调用可能应如下所示:
current->next = traverseNewDirectory(...);
(删除新节点
部分)。
关于c++ - 双向链表的内存泄漏,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42651279/