c - 有没有办法规范化不存在文件的文件路径?

标签 c file path posix filepath

我想解析当前不存在的文件的文件名中的任何 "."".." 引用。

所以,我想做的与 realpath(3) 类似,但有以下异常(exception):

  • 文件名可能引用不存在的文件
  • 不得解析符号链接(symbolic link)(主要是因为路径组件可能不存在)

是否有现有的库代码可以调用 do 来实现此目的,还是必须编写新代码?

所以,本质上:

  • a/../b/c.txt 将变为 b/c.txt
  • a/../../b/c.txt 将变为 ../b/c.txt
  • a/./b/./c.txt 将变为 a/b/c.txt

如果有人想知道,我这样做并不是为了安全目的:我正在编写一个允许执行任意 shell 命令的工具,因此安全性不是问题。我只需要一种方法来获得用于比较路径的规范表示。

最佳答案

这是我 30 多年前第一次编写的代码。您需要的代码有两种变体。

该代码可在 Stack Overflow 使用的 CC-by-SA 3.0 许可证下使用 - 您可以在注明出处的情况下使用它。

/*
@(#)File:           $RCSfile: clnpath.c,v $
@(#)Version:        $Revision: 2.19 $
@(#)Last changed:   $Date: 2017/03/26 06:32:49 $
@(#)Purpose:        Clean up pathname (lexical analysis only)
@(#)Author:         J Leffler
@(#)Copyright:      (C) JLSS 1987-2017
*/

/*TABSTOP=4*/

#include "clnpath.h"

#include "jlss.h"
#include "tokenise.h"
#include <string.h>

#define MAX_PATH_ELEMENTS   64  /* Number of levels of directory */
#define strequal(a,b)       (strcmp((a),(b)) == 0)
#define DIM(x)              (sizeof(x)/sizeof(*(x)))

#if !defined(lint)
/* Prevent over-aggressive optimizers from eliminating ID string */
extern const char jlss_id_clnpath_c[];
const char jlss_id_clnpath_c[] = "@(#)$Id: clnpath.c,v 2.19 2017/03/26 06:32:49 jleffler Exp $";
#endif /* lint */

void clnpath(char *path)
{
    char           *src;
    char           *dst;
    char            c;
    int             slash = 0;

    /* Convert multiple adjacent slashes to single slash */
    src = dst = path;
    while ((c = *dst++ = *src++) != '\0')
    {
        if (c == '/')
        {
            slash = 1;
            while (*src == '/')
                src++;
        }
    }

    if (slash == 0)
        return;

    /* Remove "./" from "./xxx" but leave "./" alone. */
    /* Remove "/." from "xxx/." but reduce "/." to "/". */
    /* Reduce "xxx/./yyy" to "xxx/yyy" */
    src = dst = (*path == '/') ? path + 1 : path;
    while (src[0] == '.' && src[1] == '/' && src[2] != '\0')
        src += 2;
    while ((c = *dst++ = *src++) != '\0')
    {
        if (c == '/' && src[0] == '.' && (src[1] == '\0' || src[1] == '/'))
        {
            src++;
            dst--;
        }
    }
    if (path[0] == '/' && path[1] == '.' &&
        (path[2] == '\0' || (path[2] == '/' && path[3] == '\0')))
        path[1] = '\0';

    /* Remove trailing slash, if any.  There is at most one! */
    /* dst is pointing one beyond terminating null */
    if ((dst -= 2) > path && *dst == '/')
        *dst++ = '\0';
}

/*
** clnpath2() is not part of the basic clnpath() function because it can
** change the meaning of a path name if there are symbolic links on the
** system.  For example, suppose /usr/tmp is a symbolic link to /var/tmp.
** If the user supplies /usr/tmp/../abcdef as the directory name, clnpath
** would transform that to /usr/abcdef, not to /var/abcdef which is what
** the kernel would interpret it as.
*/

void clnpath2(char *path)
{
    char *token[MAX_PATH_ELEMENTS];
    int   ntok;

    clnpath(path);

    /* Reduce "<name>/.." to "/" */
    if ((ntok = tokenise(path, "/", token, MAX_PATH_ELEMENTS, 0)) > 1)
    {
        for (int i = 0; i < ntok - 1; i++)
        {
            if (!strequal(token[i], "..") && strequal(token[i + 1], ".."))
            {
                if (*token[i] == '\0')
                    continue;
                while (i < ntok - 1)
                {
                    token[i] = token[i + 2];
                    i++;
                }
                ntok -= 2;
                i = -1;     /* Restart enclosing for loop */
            }
        }
    }

    /* Reassemble string */
    char *dst = path;
    if (ntok == 0)
    {
        *dst++ = '.';
        *dst = '\0';
    }
    else
    {
        if (token[0][0] == '\0')
        {
            int   i;
            for (i = 1; i < ntok && strequal(token[i], ".."); i++)
                ;
            if (i > 1)
            {
                int j;
                for (j = 1; i < ntok; i++)
                    token[j++] = token[i];
                ntok = j;
            }
        }
        if (ntok == 1 && token[0][0] == '\0')
        {
            *dst++ = '/';
            *dst = '\0';
        }
        else
        {
            for (int i = 0; i < ntok; i++)
            {
                char *src = token[i];
                while ((*dst++ = *src++) != '\0')
                    ;
                *(dst - 1) = '/';
            }
            *(dst - 1) = '\0';
        }
    }
}

#if defined(TEST)

#include <stdio.h>

#include "phasedtest.h"

/* -- PHASE 1 TESTING -- */

/* -- Phase 1 - Testing clnpath() -- */
typedef struct p1_test_case
{
    const char *input;
    const char *output;
} p1_test_case;

/* This stress tests the cleaning, concentrating on the boundaries. */
static const p1_test_case p1_tests[] =
{
    { "/",                                  "/",            },
    { "//",                                 "/",            },
    { "///",                                "/",            },
    { "/.",                                 "/",            },
    { "/./",                                "/",            },
    { "/./.",                               "/",            },
    { "/././.profile",                      "/.profile",    },
    { "./",                                 ".",            },
    { "./.",                                ".",            },
    { "././",                               ".",            },
    { "./././.profile",                     ".profile",     },
    { "abc/.",                              "abc",          },
    { "abc/./def",                          "abc/def",      },
    { "./abc",                              "abc",          },

    { "//abcd///./abcd////",                "/abcd/abcd",                   },
    { "//abcd///././../defg///ddd//.",      "/abcd/../defg/ddd",            },
    { "/abcd/./../././defg/./././ddd",      "/abcd/../defg/ddd",            },
    { "//abcd//././../defg///ddd//.///",    "/abcd/../defg/ddd",            },

    /* Most of these are minimal interest in phase 1 */
    { "/usr/tmp/clnpath.c",                 "/usr/tmp/clnpath.c",           },
    { "/usr/tmp/",                          "/usr/tmp",                     },
    { "/bin/..",                            "/bin/..",                      },
    { "bin/..",                             "bin/..",                       },
    { "/bin/.",                             "/bin",                         },
    { "sub/directory",                      "sub/directory",                },
    { "sub/directory/file",                 "sub/directory/file",           },
    { "/part1/part2/../.././../",           "/part1/part2/../../..",        },
    { "/.././../usr//.//bin/./cc",          "/../../usr/bin/cc",            },
};

static void p1_tester(const void *data)
{
    const p1_test_case *test = (const p1_test_case *)data;
    char  buffer[256];

    strcpy(buffer, test->input);
    clnpath(buffer);
    if (strcmp(buffer, test->output) == 0)
        pt_pass("<<%s>> cleans to <<%s>>\n", test->input, buffer);
    else
    {
        pt_fail("<<%s>> - unexpected output from clnpath()\n", test->input);
        pt_info("Wanted <<%s>>\n", test->output);
        pt_info("Actual <<%s>>\n", buffer);
    }
}

/* -- PHASE 2 TESTING -- */

/* -- Phase 2 - Testing clnpath2() -- */
typedef struct p2_test_case
{
    const char *input;
    const char *output;
} p2_test_case;

static const p2_test_case p2_tests[] =
{
    { "/abcd/../defg/ddd",              "/defg/ddd"         },
    { "/bin/..",                        "/"                 },
    { "bin/..",                         "."                 },
    { "/usr/bin/..",                    "/usr"              },
    { "/usr/bin/../..",                 "/"                 },
    { "usr/bin/../..",                  "."                 },
    { "../part/of/../the/way",          "../part/the/way"   },
    { "/../part/of/../the/way",         "/part/the/way"     },
    { "part1/part2/../../part3",        "part3"             },
    { "part1/part2/../../../part3",     "../part3"          },
    { "/part1/part2/../../../part3",    "/part3"            },
    { "/part1/part2/../../../",         "/"                 },
    { "/../../usr/bin/cc",              "/usr/bin/cc"       },
    { "../../usr/bin/cc",               "../../usr/bin/cc"  },
    { "part1/./part2/../../part3",      "part3"             },
    { "./part1/part2/../../../part3",   "../part3"          },
    { "/part1/part2/.././../../part3",  "/part3"            },
    { "/part1/part2/../.././../",       "/"                 },
    { "/.././..//./usr///bin/cc/",      "/usr/bin/cc"       },
};

static void p2_tester(const void *data)
{
    const p2_test_case *test = (const p2_test_case *)data;
    char  buffer[256];

    strcpy(buffer, test->input);
    clnpath2(buffer);
    if (strcmp(buffer, test->output) == 0)
        pt_pass("<<%s>> cleans to <<%s>>\n", test->input, buffer);
    else
    {
        pt_fail("<<%s>> - unexpected output from clnpath2()\n", test->input);
        pt_info("Wanted <<%s>>\n", test->output);
        pt_info("Actual <<%s>>\n", buffer);
    }
}

/* -- Phased Test Infrastructure -- */

static pt_auto_phase phases[] =
{
    { p1_tester, PT_ARRAYINFO(p1_tests), 0, "Phase 1 - Testing clnpath()" },
    { p2_tester, PT_ARRAYINFO(p2_tests), 0, "Phase 2 - Testing clnpath2()" },
};

int main(int argc, char **argv)
{

#if 0
    /* Interactive testing */
    printf("Enter pathname: ");
    while (fgets(buffer, sizeof(buffer), stdin) != NULL)
    {
        buffer[strlen(buffer) - 1] = '\0';  /* Zap newline */
        printf("Unclean: <<%s>>\n", buffer);
        clnpath(buffer);
        printf("Clean 1: <<%s>>\n", buffer);
        clnpath2(buffer);
        printf("Clean 2: <<%s>>\n", buffer);
        printf("Enter pathname: ");
    }
    putchar('\n');
#endif /* 0 */

    return(pt_auto_harness(argc, argv, phases, DIM(phases)));
}

#endif /* TEST */

第二个变体使用了一个函数tokenise(),该函数未包含在上面的源代码中。如果需要的话可以提供。

代码包含测试示例,但它使用了一个名为“Phased Test”的库,我尚未正式发布该库,因为我有一些打包问题需要解决。如果认为需要,可以在短时间内提供。

关于c - 有没有办法规范化不存在文件的文件路径?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57418213/

相关文章:

c - ANSI C - 变量范围

c - 如何在字符串数组中为某个新字符串动态分配内存并将该字符串复制到 C 中动态分配的空间中?

python - 未知的 Python 错误

java - 文件和目录例程

java - 获取路径/文件名的路径,但文件没有扩展名

node.js - Express 服务器不提供 Index.html 中的静态文件

c - 访问部分分配的数组是否定义明确,超过分配的部分?

我可以使用 while(strstr(name[a],sname)!=NULL)

C - 我试图在递归函数中使用 fprintf 但它只打印最少的行

c# - ASP.NET Web 应用程序文件路径(在 Azure 上发布)