Linux Openoffice转换Office为pdf

1、将下面的代码存储为 DocumentConvert.py

2、将该文件拷贝到 $OPENOFFICE/program 中($OPENOFFICE为主目录)

3、进入到program目录后,启动OPENOFFICE服务,启动服务命令如下:

./soffice -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard &

4、执行如下命令进行文档转换:


./python DocumentConverter.py /opt/shanhy/test.doc /opt/shanhy/test.pdf
#第一个参数为要转换的doc文件,第二个参数为转换后的pdf文件位置

5、同样的命令也可以转换其他格式文件,大家查看下面的脚本代码研究研究。

比如doc直接转换为html:

./python DocumentConverter.py /opt/shanhy/test.doc /opt/shanhy/test.html

DocumentConvert.py 脚本源码如下:

#
# PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14
#
# This script converts a document from one office format to another by
# connecting to an OpenOffice.org instance via Python-UNO bridge.
#
# Copyright (C) 2008-2009 Mirko Nasato <mirko@artofsolving.com>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
# - or any later version.
#
DEFAULT_OPENOFFICE_PORT = 8100

import uno
from os.path import abspath, isfile, splitext
from com.sun.star.beans import PropertyValue
from com.sun.star.task import ErrorCodeIOException
from com.sun.star.connection import NoConnectException

FAMILY_TEXT = "Text"
FAMILY_WEB = "Web"
FAMILY_SPREADSHEET = "Spreadsheet"
FAMILY_PRESENTATION = "Presentation"
FAMILY_DRAWING = "Drawing"

#---------------------#
# Configuration Start #
#---------------------#

# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter

# most formats are auto-detected; only those requiring options are defined here
IMPORT_FILTER_MAP = {
    "txt": {
        "FilterName": "Text (encoded)",
        "FilterOptions": "utf8"
    },
    "csv": {
        "FilterName": "Text - txt - csv (StarCalc)",
        "FilterOptions": "44,34,0"
    }
}

EXPORT_FILTER_MAP = {
    "pdf": {
        FAMILY_TEXT: { "FilterName": "writer_pdf_Export" },
        FAMILY_WEB: { "FilterName": "writer_web_pdf_Export" },
        FAMILY_SPREADSHEET: { "FilterName": "calc_pdf_Export" },
        FAMILY_PRESENTATION: { "FilterName": "impress_pdf_Export" },
        FAMILY_DRAWING: { "FilterName": "draw_pdf_Export" }
    },
    "html": {
        FAMILY_TEXT: { "FilterName": "HTML (StarWriter)" },
        FAMILY_SPREADSHEET: { "FilterName": "HTML (StarCalc)" },
        FAMILY_PRESENTATION: { "FilterName": "impress_html_Export" }
    },
    "odt": {
        FAMILY_TEXT: { "FilterName": "writer8" },
        FAMILY_WEB: { "FilterName": "writerweb8_writer" }
    },
    "doc": {
        FAMILY_TEXT: { "FilterName": "MS Word 97" }
    },
    "rtf": {
        FAMILY_TEXT: { "FilterName": "Rich Text Format" }
    },
    "txt": {
        FAMILY_TEXT: {
            "FilterName": "Text",
            "FilterOptions": "utf8"
        }
    },
    "ods": {
        FAMILY_SPREADSHEET: { "FilterName": "calc8" }
    },
    "xls": {
        FAMILY_SPREADSHEET: { "FilterName": "MS Excel 97" }
    },
    "csv": {
        FAMILY_SPREADSHEET: {
            "FilterName": "Text - txt - csv (StarCalc)",
            "FilterOptions": "44,34,0"
        }
    },
    "odp": {
        FAMILY_PRESENTATION: { "FilterName": "impress8" }
    },
    "ppt": {
        FAMILY_PRESENTATION: { "FilterName": "MS PowerPoint 97" }
    },
    "swf": {
        FAMILY_DRAWING: { "FilterName": "draw_flash_Export" },
        FAMILY_PRESENTATION: { "FilterName": "impress_flash_Export" }
    }
}

PAGE_STYLE_OVERRIDE_PROPERTIES = {
    FAMILY_SPREADSHEET: {
        #--- Scale options: uncomment 1 of the 3 ---
        # a) 'Reduce / enlarge printout': 'Scaling factor'
        "PageScale": 100,
        # b) 'Fit print range(s) to width / height': 'Width in pages' and 'Height in pages'
        #"ScaleToPagesX": 1, "ScaleToPagesY": 1000,
        # c) 'Fit print range(s) on number of pages': 'Fit print range(s) on number of pages'
        #"ScaleToPages": 1,
        "PrintGrid": False
    }
}

#-------------------#
# Configuration End #
#-------------------#

class DocumentConversionException(Exception):

    def __init__(self, message):
        self.message = message

    def __str__(self):
        return self.message

class DocumentConverter:

    def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
        localContext = uno.getComponentContext()
        resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
        try:
            context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
        except NoConnectException:
            raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
        self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)

    def convert(self, inputFile, outputFile):

        inputUrl = self._toFileUrl(inputFile)
        outputUrl = self._toFileUrl(outputFile)

        loadProperties = { "Hidden": True }
        inputExt = self._getFileExt(inputFile)
        if IMPORT_FILTER_MAP.has_key(inputExt):
            loadProperties.update(IMPORT_FILTER_MAP[inputExt])

        document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties))
        try:
            document.refresh()
        except AttributeError:
            pass

        family = self._detectFamily(document)
        self._overridePageStyleProperties(document, family)

        outputExt = self._getFileExt(outputFile)
        storeProperties = self._getStoreProperties(document, outputExt)

        try:
            document.storeToURL(outputUrl, self._toProperties(storeProperties))
        finally:
            document.close(True)

    def _overridePageStyleProperties(self, document, family):
        if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family):
            properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family]
            pageStyles = document.getStyleFamilies().getByName('PageStyles')
            for styleName in pageStyles.getElementNames():
                pageStyle = pageStyles.getByName(styleName)
                for name, value in properties.items():
                    pageStyle.setPropertyValue(name, value)

    def _getStoreProperties(self, document, outputExt):
        family = self._detectFamily(document)
        try:
            propertiesByFamily = EXPORT_FILTER_MAP[outputExt]
        except KeyError:
            raise DocumentConversionException, "unknown output format: '%s'" % outputExt
        try:
            return propertiesByFamily[family]
        except KeyError:
            raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)

    def _detectFamily(self, document):
        if document.supportsService("com.sun.star.text.WebDocument"):
            return FAMILY_WEB
        if document.supportsService("com.sun.star.text.GenericTextDocument"):
            # must be TextDocument or GlobalDocument
            return FAMILY_TEXT
        if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
            return FAMILY_SPREADSHEET
        if document.supportsService("com.sun.star.presentation.PresentationDocument"):
            return FAMILY_PRESENTATION
        if document.supportsService("com.sun.star.drawing.DrawingDocument"):
            return FAMILY_DRAWING
        raise DocumentConversionException, "unknown document family: %s" % document

    def _getFileExt(self, path):
        ext = splitext(path)[1]
        if ext is not None:
            return ext[1:].lower()

    def _toFileUrl(self, path):
        return uno.systemPathToFileUrl(abspath(path))

    def _toProperties(self, dict):
        props = []
        for key in dict:
            prop = PropertyValue()
            prop.Name = key
            prop.Value = dict[key]
            props.append(prop)
        return tuple(props)

if __name__ == "__main__":
    from sys import argv, exit

    if len(argv) < 3:
        print "USAGE: python %s <input-file> <output-file>" % argv[0]
        exit(255)
    if not isfile(argv[1]):
        print "no such input file: %s" % argv[1]
        exit(1)

    try:
        converter = DocumentConverter()
        converter.convert(argv[1], argv[2])
    except DocumentConversionException, exception:
        print "ERROR! " + str(exception)
        exit(1)
    except ErrorCodeIOException, exception:
        print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
        exit(1)
时间: 2024-12-06 18:59:04

Linux Openoffice转换Office为pdf的相关文章

应用-Office转PDF后台转换不了

问题描述 Office转PDF后台转换不了 需要做一个在线浏览office文件,思路是先将Office文件转换为PDF,再由PDF生成swf flash文件,再有flexpaper在线播放swf文件. 本地用VS2010调试一切OK,所以代码本身没问题. 上传IIS服务器就不行了,服务器上: 发现PDF生成swf是没有问题的,所以文件夹的访问权限肯定没问题. 但是office生成PDF就死活出不来. 本地电脑用IIS发布,用浏览器直接访问的话,问题和在服务器发布后效果一样,office转pdf不

使用jacob调用Windows的com对象,转换Office文件为pdf、html等

1.介绍      Jacob 是Java-COM Bridge的缩写,它在Java与微软的COM组件之间构建一座桥梁.使用Jacob自带的DLL动态链接库,并通过JNI的方式实现了在Java平台上对COM程序的调用.至于什么是COM组件,大家自己Google吧. 2.安装和配置     Jacob是一个开源软件,它的官方站点是:http://danadler.com/jacob/ 大家可以到上面下载源代码研究,也可以直接下载编译后的二进制文件.     下载包jacob_x.x.zip,解压后

万能迅捷PDF转换成Word软件全能版 Office转PDF

日前,最新发布的迅捷PDF转换成Word转换器在原有基础上,通过增强型的转换功能集成,已经不再仅仅局限于普通的PDF转换成Word的基础功能,而是成为一款具备全面覆盖PDF与多种不同文件格式的互相转换,最大限度地满足了不同用户的需求,成为目前下载和使用量最大的转换工具之一. 作为国内知名的文件格式转换大师,迅捷PDF转换成Word转换器较之传统的PDF转换工具具备极大的不同.普通的PDF转换成Word转换器软件功能核心单一,转换质量也较差,同时转换效率方面也很难满足广大用户的需求.实际上,很多用

pdf-C#下通过OpenOffice转换PDF出错

问题描述 C#下通过OpenOffice转换PDF出错 参照niuhea的<通过OpenOffice转换PDF>,用C#编写如下代码: using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using com.artofsolving.jodconverter; using

c#下通过OpenOffice转换PDF出错

问题描述 参照niuhea的[<通过OpenOffice转换PDF>](http://blog.csdn.net/niuhea/article/details/7749684""),用C#编写如下代码:```usingSystem;usingSystem.Collections.Generic;usingSystem.Linq;usingSystem.Web;usingSystem.Web.UI;usingSystem.Web.UI.WebControls;usingcom

后台用java实现的在线阅读文档,支持office、pdf,类似百度文库,求解决方案!

问题描述 后台用java实现的在线阅读文档,支持office.pdf,类似百度文库,求解决方案! 目前上传的文档只能下载下来后,用户通过本地软件打开.不能通过网页直接浏览,无需下载到本地. 解决方案 参考 save4me 说的那个文章,现在实现在线浏览都是最终转化为swf文件,通过FlexPaper来播放swf文件实现的.那个文档是这样的,比如一个word文档:上传一个word文档,通过OpenOffice转换成pdf文件,再用swftools把pdf文件转换成swf文件.如果上传的是pdf文件

docx转pdf-利用jodconverter+openoffice转换docx、xlsx不成功!求大神帮忙!跪谢啦!

问题描述 利用jodconverter+openoffice转换docx.xlsx不成功!求大神帮忙!跪谢啦! 用的是jodconverter-core-3.0-beta-4-dist里的jar包: OpenOffice 用的4.1.1的版本: 调用代码书写如下: public static boolean startService(String wordFile,String pdfFile,String fileswf){ boolean flag = false; DefaultOffic

wkhtmltopdf:一个Linux中将网页转成PDF的智能工具

wkhtmltopdf 是一个开源.简单而有效的命令行 shell 程序,它可以将任何 HTML (网页)转换为 PDF 文档或图像(jpg.png 等). wkhtmltopdf 是用 C++ 编写的,并在 GNU/GPL (通用公共许可证)下发布.它使用 WebKit 渲染引擎将 HTML 页面转换为 PDF 文档且不会丢失页面的质量.这是一个用于实时创建和存储网页快照的非常有用且可信赖的解决方案. wkhtmltopdf 的功能 开源并且跨平台. 使用 WebKit 引擎将任意 HTML

wkhtmltopdf:一个 Linux 中将网页转成 PDF 的智能工具

wkhtmltopdf 是一个开源.简单而有效的命令行 shell 程序,它可以将任何 HTML (网页)转换为 PDF 文档或图像(jpg.png 等). wkhtmltopdf 是用 C++ 编写的,并在 GNU/GPL (通用公共许可证)下发布.它使用 WebKit 渲染引擎将 HTML 页面转换为 PDF 文档且不会丢失页面的质量.这是一个用于实时创建和存储网页快照的非常有用且可信赖的解决方案. wkhtmltopdf 的功能 开源并且跨平台. 使用 WebKit 引擎将任意 HTML