我在为“阅读 PDF”开发 iPhone 应用程序时遇到了一些问题。 我试过下面的代码。我知道我使用了错误的解析方法 - 解析方法仅用于搜索目的。但我想将整个 pdf 文本转换为字符串。比如说 Apple 的 MobileHIG.pdf - 我在这段代码中使用过。
@implementation NetPDFViewController
size_t totalPages; // a variable to store total pages
// a method to get the pdf ref
CGPDFDocumentRef MyGetPDFDocumentRef (const char *filename) {
CFStringRef path;
CFURLRef url;
CGPDFDocumentRef document;
path = CFStringCreateWithCString (NULL, filename,kCFStringEncodingUTF8);
url = CFURLCreateWithFileSystemPath (NULL, path, kCFURLPOSIXPathStyle, 0);
CFRelease (path);
document = CGPDFDocumentCreateWithURL (url);// 2
CFRelease(url);
int count = CGPDFDocumentGetNumberOfPages (document);// 3
if (count == 0) {
printf("`%s' needs at least one page!", filename);
return NULL;
}
return document;
}
// table methods to parse pdf
static void op_MP (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("MP /%s\n", name);
}
static void op_DP (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("DP /%s\n", name);
}
static void op_BMC (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("BMC /%s\n", name);
}
static void op_BDC (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("BDC /%s\n", name);
}
static void op_EMC (CGPDFScannerRef s, void *info) {
const char *name;
if (!CGPDFScannerPopName(s, &name))
return;
printf("EMC /%s\n", name);
}
// a method to display pdf page.
void MyDisplayPDFPage (CGContextRef myContext,size_t pageNumber,const char *filename) {
CGPDFDocumentRef document;
CGPDFPageRef page;
document = MyGetPDFDocumentRef (filename);// 1
totalPages=CGPDFDocumentGetNumberOfPages(document);
page = CGPDFDocumentGetPage (document, pageNumber);// 2
CGPDFDictionaryRef d;
d = CGPDFPageGetDictionary(page);
// ----- edit problem here - CGPDFDictionary is completely unknown
// ----- as we don't know keys & values of it.
CGPDFScannerRef myScanner;
CGPDFOperatorTableRef myTable;
myTable = CGPDFOperatorTableCreate();
CGPDFOperatorTableSetCallback (myTable, "MP", &op_MP);
CGPDFOperatorTableSetCallback (myTable, "DP", &op_DP);
CGPDFOperatorTableSetCallback (myTable, "BMC", &op_BMC);
CGPDFOperatorTableSetCallback (myTable, "BDC", &op_BDC);
CGPDFOperatorTableSetCallback (myTable, "EMC", &op_EMC);
CGPDFContentStreamRef myContentStream = CGPDFContentStreamCreateWithPage (page);// 3
myScanner = CGPDFScannerCreate (myContentStream, myTable, NULL);// 4
CGPDFScannerScan (myScanner);// 5
// CGPDFDictionaryRef d;
CGPDFStringRef str; // represents a sequence of bytes
d = CGPDFPageGetDictionary(page);
if (CGPDFDictionaryGetString(d, "Thumb", &str)){
CFStringRef s;
s = CGPDFStringCopyTextString(str);
if (s != NULL) {
//need something in here in case it cant find anything
NSLog(@"%@ testing it", s);
}
CFRelease(s);
// CFDataRef data = CGPDFStreamCopyData (stream, CGPDFDataFormatRaw);
}
// -----------------------------------
CGContextDrawPDFPage (myContext, page);// 3
CGContextTranslateCTM(myContext, 0, 20);
CGContextScaleCTM(myContext, 1.0, -1.0);
CGPDFDocumentRelease (document);// 4
}
- (void)viewDidLoad {
[super viewDidLoad];
// --------------------------------------------------------
// code for simple direct image from pdf docs.
UIGraphicsBeginImageContext(CGSizeMake(320, 460));
initialPage=28;
MyDisplayPDFPage(UIGraphicsGetCurrentContext(), initialPage, [[[NSBundle mainBundle] pathForResource:@"MobileHIG" ofType:@"pdf"] UTF8String]);
imgV.image=UIGraphicsGetImageFromCurrentImageContext();
imgV.image=[imgV.image rotate:UIImageOrientationDownMirrored];
}
- (void)touchesBegan:(NSSet *)touches withEvent:(UIEvent *)event{
UITouch *touch = [touches anyObject];
CGPoint LasttouchPoint = [touch locationInView:self.view];
int LasttouchX = LasttouchPoint.x;
startpoint=LasttouchX;
}
- (void)touchesMoved:(NSSet *)touches withEvent:(UIEvent *)event{
}
- (void)touchesEnded:(NSSet *)touches withEvent:(UIEvent *)event{
UITouch *touch = [touches anyObject];
CGPoint LasttouchPoint = [touch locationInView:self.view];
int LasttouchX = LasttouchPoint.x;
endpoint=LasttouchX;
if(startpoint>(endpoint+75)){
initialPage++;
[self loadPage:initialPage nextOne:YES];
} else if((startpoint+75)<endpoint){
initialPage--;
[self loadPage:initialPage nextOne:NO];
}
}
-(void)loadPage:(NSUInteger)page nextOne:(BOOL)yesOrNo{
if(page<=totalPages && page>0){
UIGraphicsBeginImageContext(CGSizeMake(720, 720));
MyDisplayPDFPage(UIGraphicsGetCurrentContext(), page, [[[NSBundle mainBundle] pathForResource:@"MobileHIG" ofType:@"pdf"] UTF8String]);
CATransition *transition = [CATransition animation];
transition.duration = 0.75;
transition.timingFunction = [CAMediaTimingFunction functionWithName:kCAMediaTimingFunctionEaseInEaseOut];
transition.type=kCATransitionPush;
if(yesOrNo){
transition.subtype=kCATransitionFromRight;
} else {
transition.subtype=kCATransitionFromLeft;
}
transition.delegate = self;
[imgV.layer addAnimation:transition forKey:nil];
imgV.image=UIGraphicsGetImageFromCurrentImageContext();
imgV.image=[imgV.image rotate:UIImageOrientationDownMirrored];
}
}
但我连 pdf 文档中的一行都没有读懂。 还缺少什么?
最佳答案
如果您想从 pdf 文件中提取一些内容,那么您可能需要阅读以下内容:
来自 Quartz 2D 编程指南。
基本上,您将使用 CGPDFScanner
对象来解析内容,其工作方式如下。您注册了一些回调,当在 pdf 流中遇到一些 pdf 运算符时,Quartz 2D 将自动调用这些回调。在这个初始步骤之后,您实际上开始解析 pdf 流。
简要查看您的代码,您似乎没有按照解析通过 CGPDFDocumentGetPage()
获得的页面的 pdf 内容所需的步骤进行操作.您首先需要使用 CGPDFOperatorTableCreate()
设置回调和 CGPDFOperatorTableSetCallback()
, 然后你得到页面,你需要使用该页面创建一个内容流(使用 CGPDFContentStreamCreateWithPage()
)然后实例化一个 CGPDFScanner
通过CGPDFScannerCreate()
并实际开始扫描 CGPDFScannerScan()
.
上述 URL 指出的文档的“解析 PDF 内容”部分为您提供了实现 pdf 解析所需的所有信息。
希望这对您有所帮助。
关于iphone - 通过 iPhone 应用程序以字符串形式读取 PDF 文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2362393/