hachoir_parser/misc/word_doc.py

"""
Documents:

* libwx source code: see fib.c source code
* "Microsoft Word 97 Binary File Format"
   http://bio.gsi.de/DOCS/AIX/wword8.html

   Microsoft Word 97 (aka Version 8) for Windows and Macintosh. From the Office
   book, found in the Microsoft Office Development section in the MSDN Online
   Library. HTMLified June 1998. Revised Aug 1 1998, added missing Definitions
   section. Revised Dec 21 1998, added missing Document Properties (section).
"""

from hachoir_parser import Parser
from hachoir_core.field import (FieldSet,
    Bit, Bits,
    UInt8, Int16, UInt16, UInt32, Int32,
    NullBytes, RawBytes, PascalString16,
    DateTimeMSDOS32)
from hachoir_core.endian import LITTLE_ENDIAN

TIMESTAMP = DateTimeMSDOS32

class BaseWordDocument:
    def createFields(self):
        yield UInt16(self, "wIdent", 2)
        yield UInt16(self, "nFib")
        yield UInt16(self, "nProduct")
        yield UInt16(self, "lid")
        yield Int16(self, "pnNext")

        yield Bit(self, "fDot")
        yield Bit(self, "fGlsy")
        yield Bit(self, "fComplex")
        yield Bit(self, "fHasPic")
        yield Bits(self, "cQuickSaves", 4)
        yield Bit(self, "fEncrypted")
        yield Bit(self, "fWhichTblStm")
        yield Bit(self, "fReadOnlyRecommanded")
        yield Bit(self, "fWriteReservation")
        yield Bit(self, "fExtChar")
        yield Bit(self, "fLoadOverride")
        yield Bit(self, "fFarEeast")
        yield Bit(self, "fCrypto")

        yield UInt16(self, "nFibBack")
        yield UInt32(self, "lKey")
        yield UInt8(self, "envr")

        yield Bit(self, "fMac")
        yield Bit(self, "fEmptySpecial")
        yield Bit(self, "fLoadOverridePage")
        yield Bit(self, "fFutureSavedUndo")
        yield Bit(self, "fWord97Save")
        yield Bits(self, "fSpare0", 3)

        yield UInt16(self, "chse")
        yield UInt16(self, "chsTables")
        yield UInt32(self, "fcMin")
        yield UInt32(self, "fcMac")

        yield PascalString16(self, "file_creator", strip="\0")

        yield NullBytes(self, "reserved[]", 12)

        yield Int16(self, "lidFE")
        yield UInt16(self, "clw")
        yield Int32(self, "cbMac")
        yield UInt32(self, "lProductCreated")
        yield TIMESTAMP(self, "lProductRevised")

        yield UInt32(self, "ccpText")
        yield Int32(self, "ccpFtn")
        yield Int32(self, "ccpHdr")
        yield Int32(self, "ccpMcr")
        yield Int32(self, "ccpAtn")
        yield Int32(self, "ccpEdn")
        yield Int32(self, "ccpTxbx")
        yield Int32(self, "ccpHdrTxbx")
        yield Int32(self, "pnFbpChpFirst")
        yield Int32(self, "pnChpFirst")
        yield Int32(self, "cpnBteChp")
        yield Int32(self, "pnFbpPapFirst")
        yield Int32(self, "pnPapFirst")
        yield Int32(self, "cpnBtePap")
        yield Int32(self, "pnFbpLvcFirst")
        yield Int32(self, "pnLvcFirst")
        yield Int32(self, "cpnBteLvc")
        yield Int32(self, "fcIslandFirst")
        yield Int32(self, "fcIslandLim")
        yield UInt16(self, "cfclcb")
        yield Int32(self, "fcStshfOrig")
        yield UInt32(self, "lcbStshfOrig")
        yield Int32(self, "fcStshf")
        yield UInt32(self, "lcbStshf")

        yield Int32(self, "fcPlcffndRef")
        yield UInt32(self, "lcbPlcffndRef")
        yield Int32(self, "fcPlcffndTxt")
        yield UInt32(self, "lcbPlcffndTxt")
        yield Int32(self, "fcPlcfandRef")
        yield UInt32(self, "lcbPlcfandRef")
        yield Int32(self, "fcPlcfandTxt")
        yield UInt32(self, "lcbPlcfandTxt")
        yield Int32(self, "fcPlcfsed")
        yield UInt32(self, "lcbPlcfsed")
        yield Int32(self, "fcPlcpad")
        yield UInt32(self, "lcbPlcpad")
        yield Int32(self, "fcPlcfphe")
        yield UInt32(self, "lcbPlcfphe")
        yield Int32(self, "fcSttbfglsy")
        yield UInt32(self, "lcbSttbfglsy")
        yield Int32(self, "fcPlcfglsy")
        yield UInt32(self, "lcbPlcfglsy")
        yield Int32(self, "fcPlcfhdd")
        yield UInt32(self, "lcbPlcfhdd")
        yield Int32(self, "fcPlcfbteChpx")
        yield UInt32(self, "lcbPlcfbteChpx")
        yield Int32(self, "fcPlcfbtePapx")
        yield UInt32(self, "lcbPlcfbtePapx")
        yield Int32(self, "fcPlcfsea")
        yield UInt32(self, "lcbPlcfsea")
        yield Int32(self, "fcSttbfffn")
        yield UInt32(self, "lcbSttbfffn")
        yield Int32(self, "fcPlcffldMom")
        yield UInt32(self, "lcbPlcffldMom")
        yield Int32(self, "fcPlcffldHdr")
        yield UInt32(self, "lcbPlcffldHdr")
        yield Int32(self, "fcPlcffldFtn")
        yield UInt32(self, "lcbPlcffldFtn")
        yield Int32(self, "fcPlcffldAtn")
        yield UInt32(self, "lcbPlcffldAtn")
        yield Int32(self, "fcPlcffldMcr")
        yield UInt32(self, "lcbPlcffldMcr")
        yield Int32(self, "fcSttbfbkmk")
        yield UInt32(self, "lcbSttbfbkmk")
        yield Int32(self, "fcPlcfbkf")
        yield UInt32(self, "lcbPlcfbkf")
        yield Int32(self, "fcPlcfbkl")
        yield UInt32(self, "lcbPlcfbkl")
        yield Int32(self, "fcCmds")
        yield UInt32(self, "lcbCmds")
        yield Int32(self, "fcPlcmcr")
        yield UInt32(self, "lcbPlcmcr")
        yield Int32(self, "fcSttbfmcr")
        yield UInt32(self, "lcbSttbfmcr")
        yield Int32(self, "fcPrDrvr")
        yield UInt32(self, "lcbPrDrvr")
        yield Int32(self, "fcPrEnvPort")
        yield UInt32(self, "lcbPrEnvPort")
        yield Int32(self, "fcPrEnvLand")
        yield UInt32(self, "lcbPrEnvLand")
        yield Int32(self, "fcWss")
        yield UInt32(self, "lcbWss")
        yield Int32(self, "fcDop")
        yield UInt32(self, "lcbDop")
        yield Int32(self, "fcSttbfAssoc")
        yield UInt32(self, "lcbSttbfAssoc")
        yield Int32(self, "fcClx")
        yield UInt32(self, "lcbClx")
        yield Int32(self, "fcPlcfpgdFtn")
        yield UInt32(self, "lcbPlcfpgdFtn")
        yield Int32(self, "fcAutosaveSource")
        yield UInt32(self, "lcbAutosaveSource")
        yield Int32(self, "fcGrpXstAtnOwners")
        yield UInt32(self, "lcbGrpXstAtnOwners")
        yield Int32(self, "fcSttbfAtnbkmk")
        yield UInt32(self, "lcbSttbfAtnbkmk")
        yield Int32(self, "fcPlcdoaMom")
        yield UInt32(self, "lcbPlcdoaMom")
        yield Int32(self, "fcPlcdoaHdr")
        yield UInt32(self, "lcbPlcdoaHdr")
        yield Int32(self, "fcPlcspaMom")
        yield UInt32(self, "lcbPlcspaMom")
        yield Int32(self, "fcPlcspaHdr")
        yield UInt32(self, "lcbPlcspaHdr")
        yield Int32(self, "fcPlcfAtnbkf")
        yield UInt32(self, "lcbPlcfAtnbkf")
        yield Int32(self, "fcPlcfAtnbkl")
        yield UInt32(self, "lcbPlcfAtnbkl")
        yield Int32(self, "fcPms")
        yield UInt32(self, "lcbPms")
        yield Int32(self, "fcFormFldSttbs")
        yield UInt32(self, "lcbFormFldSttbs")
        yield Int32(self, "fcPlcfendRef")
        yield UInt32(self, "lcbPlcfendRef")
        yield Int32(self, "fcPlcfendTxt")
        yield UInt32(self, "lcbPlcfendTxt")
        yield Int32(self, "fcPlcffldEdn")
        yield UInt32(self, "lcbPlcffldEdn")
        yield Int32(self, "fcPlcfpgdEdn")
        yield UInt32(self, "lcbPlcfpgdEdn")
        yield Int32(self, "fcDggInfo")
        yield UInt32(self, "lcbDggInfo")
        yield Int32(self, "fcSttbfRMark")
        yield UInt32(self, "lcbSttbfRMark")
        yield Int32(self, "fcSttbCaption")
        yield UInt32(self, "lcbSttbCaption")
        yield Int32(self, "fcSttbAutoCaption")
        yield UInt32(self, "lcbSttbAutoCaption")
        yield Int32(self, "fcPlcfwkb")
        yield UInt32(self, "lcbPlcfwkb")
        yield Int32(self, "fcPlcfspl")
        yield UInt32(self, "lcbPlcfspl")
        yield Int32(self, "fcPlcftxbxTxt")
        yield UInt32(self, "lcbPlcftxbxTxt")
        yield Int32(self, "fcPlcffldTxbx")
        yield UInt32(self, "lcbPlcffldTxbx")
        yield Int32(self, "fcPlcfhdrtxbxTxt")
        yield UInt32(self, "lcbPlcfhdrtxbxTxt")
        yield Int32(self, "fcPlcffldHdrTxbx")
        yield UInt32(self, "lcbPlcffldHdrTxbx")
        yield Int32(self, "fcStwUser")
        yield UInt32(self, "lcbStwUser")
        yield Int32(self, "fcSttbttmbd")
        yield UInt32(self, "cbSttbttmbd")
        yield Int32(self, "fcUnused")
        yield UInt32(self, "lcbUnused")
        yield Int32(self, "fcPgdMother")
        yield UInt32(self, "lcbPgdMother")
        yield Int32(self, "fcBkdMother")
        yield UInt32(self, "lcbBkdMother")
        yield Int32(self, "fcPgdFtn")
        yield UInt32(self, "lcbPgdFtn")
        yield Int32(self, "fcBkdFtn")
        yield UInt32(self, "lcbBkdFtn")
        yield Int32(self, "fcPgdEdn")
        yield UInt32(self, "lcbPgdEdn")
        yield Int32(self, "fcBkdEdn")
        yield UInt32(self, "lcbBkdEdn")
        yield Int32(self, "fcSttbfIntlFld")
        yield UInt32(self, "lcbSttbfIntlFld")
        yield Int32(self, "fcRouteSlip")
        yield UInt32(self, "lcbRouteSlip")
        yield Int32(self, "fcSttbSavedBy")
        yield UInt32(self, "lcbSttbSavedBy")
        yield Int32(self, "fcSttbFnm")
        yield UInt32(self, "lcbSttbFnm")
        yield Int32(self, "fcPlcfLst")
        yield UInt32(self, "lcbPlcfLst")
        yield Int32(self, "fcPlfLfo")
        yield UInt32(self, "lcbPlfLfo")
        yield Int32(self, "fcPlcftxbxBkd")
        yield UInt32(self, "lcbPlcftxbxBkd")
        yield Int32(self, "fcPlcftxbxHdrBkd")
        yield UInt32(self, "lcbPlcftxbxHdrBkd")
        yield Int32(self, "fcDocUndo")
        yield UInt32(self, "lcbDocUndo")
        yield Int32(self, "fcRgbuse")
        yield UInt32(self, "lcbRgbuse")
        yield Int32(self, "fcUsp")
        yield UInt32(self, "lcbUsp")
        yield Int32(self, "fcUskf")
        yield UInt32(self, "lcbUskf")
        yield Int32(self, "fcPlcupcRgbuse")
        yield UInt32(self, "lcbPlcupcRgbuse")
        yield Int32(self, "fcPlcupcUsp")
        yield UInt32(self, "lcbPlcupcUsp")
        yield Int32(self, "fcSttbGlsyStyle")
        yield UInt32(self, "lcbSttbGlsyStyle")
        yield Int32(self, "fcPlgosl")
        yield UInt32(self, "lcbPlgosl")
        yield Int32(self, "fcPlcocx")
        yield UInt32(self, "lcbPlcocx")
        yield Int32(self, "fcPlcfbteLvc")
        yield UInt32(self, "lcbPlcfbteLvc")
        yield TIMESTAMP(self, "ftModified")
        yield Int32(self, "fcPlcflvc")
        yield UInt32(self, "lcbPlcflvc")
        yield Int32(self, "fcPlcasumy")
        yield UInt32(self, "lcbPlcasumy")
        yield Int32(self, "fcPlcfgram")
        yield UInt32(self, "lcbPlcfgram")
        yield Int32(self, "fcSttbListNames")
        yield UInt32(self, "lcbSttbListNames")
        yield Int32(self, "fcSttbfUssr")
        yield UInt32(self, "lcbSttbfUssr")

        tail = (self.size - self.current_size) // 8
        if tail:
            yield RawBytes(self, "tail", tail)

class WordDocumentFieldSet(BaseWordDocument, FieldSet):
    pass

class WordDocumentParser(BaseWordDocument, Parser):
    PARSER_TAGS = {
        "id": "word_document",
        "min_size": 8,
        "description": "Microsoft Office Word document",
    }
    endian = LITTLE_ENDIAN

    def __init__(self, stream, **kw):
        Parser.__init__(self, stream, **kw)

    def validate(self):
        return True
hachoir-parser documentation built on Sept. 20, 2017, 5:30 p.m.