Quantcast
Channel: VBForums - CodeBank - Visual Basic 6 and earlier
Viewing all articles
Browse latest Browse all 1449

MS-OXRTFCP Compressed RTF Decompression Methods

$
0
0
I had a need to decompress MS-OXRTFCP Compressed RTF data that is used by Outlook messages. The format is described here in great detail: https://interoperability.blob.core.w...OXRTFCP%5d.pdf

I wasn't enjoying the prospect of turning that spec into code, so I did a quick search to see if it had already been tackled. I found this VB.net code by a user named "ForumAccount" here at vbforums: https://www.vbforums.com/showthread....xchange-Server

After a bit of struggling with the ShiftLeft/Right stuff, I plugged in some work by Jost Schwider that I found over at VBspeed: http://www.xbeat.net/vbspeed/c_Shift...tm#ShiftLeft06 and http://www.xbeat.net/vbspeed/c_Shift...m#ShiftLeftZ08

After a bit of cleanup and formatting to my preferred style, everything was working nicely so I thought I'd share it with you all in case you ever have a need for it.

There are 2 public methods that should be self-explanatory: DecompressRtfFile and DecompressRtfBytes.

Note that I use RC6 for a couple of lines because the cFSO and cArrayList classes are just too darned handy. It should be pretty straight forward to swap the RC6 code out for vanilla VB6 if you are so inclined.


Code:

Option Explicit

' This code has been adapted from a VB.net by a user named "ForumAccount": found here: https://www.vbforums.com/showthread.php?669883-NET-3-5-RtfDecompressor-Decompress-RTF-From-Outlook-And-Exchange-Server
' That code was apparently written based on the spec found here: https://docs.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxrtfcp/65dfe2df-1b69-43fc-8ebd-21819a7463fb

Private Const mc_CircularDictionaryMaxLength As Long = &H1000&
Private Const mc_RtfHeaderLength As Long = 16

Private Enum e_CompressedRtfType
  rtf_Compressed = &H75465A4C  ' Magic number for uncompressed RTF. Reportedly very rare in the wild.
  rtf_Uncompressed = &H414C454D ' Magic number for compressed RTF used by Outlook message storage.
End Enum

Private Type RtfControl
  Flags(0 To 7) As Boolean  ' "Bit" flags
  Length As Long
End Type

Private Type CompressedRtfHeader
  CompressedSize As Long
  UncompressedSize As Long
  CompressionType As e_CompressedRtfType
  Crc As Long
End Type

Private mo_Crc As rc6.cArrayList
Private mo_Dictionary As rc6.cArrayList

Public Function DecompressRtfFile(ByVal p_CompressedRtfFilePath As String, Optional ByVal p_CheckCrc As Boolean = True) As String
  DecompressRtfFile = DecompressRtfBytes(New_c.FSO.ReadByteContent(p_CompressedRtfFilePath), p_CheckCrc)
End Function

Public Function DecompressRtfBytes(pa_CompressedRtfBytes() As Byte, Optional ByVal p_CheckCrc As Boolean = True) As String
  Dim l_Word As Long
  Dim l_Upper As Integer
  Dim l_Lower As Integer
  Dim lt_Header As CompressedRtfHeader
  Dim l_InitialLen As Long
  Dim lo_Dictionary As rc6.cArrayList
  Dim l_DictionaryIndex As Long
  Dim la_Dictionary() As Byte
  Dim l_Offset As Long
  Dim lo_UncompressedRtf As rc6.cArrayList
  Dim lt_Control As RtfControl
  Dim l_Flag As Boolean
  Dim l_CorrectedOffset As Long
  Dim ii As Long
  Dim jj As Long
  Dim kk As Long

  InitDictionary
  InitCrc
 
  ' Copy header values into lt_Header
  New_c.MemCopy ByVal VarPtr(lt_Header), ByVal VarPtr(pa_CompressedRtfBytes(0)), 16 ' .CompressedSize), ByVal VarPtr(pa_CompressedRtfBytes(0)), 4
     
  Select Case lt_Header.CompressionType
  Case rtf_Uncompressed
      ' Uncompressed, just return string
      DecompressRtfBytes = StrConv(pa_CompressedRtfBytes, vbUnicode)
 
  Case rtf_Compressed
      ' Compressed RTF - confirm CRC if required and then decompress
     
      If p_CheckCrc Then
        ' Check the data has been corrupt/tampered with by comparing the header CRC to the data CRC
        If CalculateCrc(pa_CompressedRtfBytes, mc_RtfHeaderLength) <> lt_Header.Crc Then
            Err.Raise vbObjectError, , "Stream is Corrupt."
        End If
      End If
     
      Set lo_UncompressedRtf = New_c.ArrayList(vbByte)
     
      l_InitialLen = mo_Dictionary.Count
      l_DictionaryIndex = l_InitialLen
     
      ' Stuff the "initial"/seed dictionary into our working dictionary
      mo_Dictionary.BindToArray la_Dictionary
     
      Set lo_Dictionary = New_c.ArrayList(vbByte, la_Dictionary)
      For ii = l_InitialLen To mc_CircularDictionaryMaxLength - 1
        lo_Dictionary.Add 0
      Next ii

      mo_Dictionary.ReleaseArrayBinding la_Dictionary
      Erase la_Dictionary
     
      ' Decompress the RTF data (if required).
      For ii = mc_RtfHeaderLength To UBound(pa_CompressedRtfBytes)
        lt_Control = GetRtfControl(pa_CompressedRtfBytes(ii))
        l_Offset = 1
       
        For jj = LBound(lt_Control.Flags) To UBound(lt_Control.Flags)
            l_Flag = lt_Control.Flags(jj)
           
            If l_Flag Then
              ' Uncompressed data
             
              lo_UncompressedRtf.Add pa_CompressedRtfBytes(ii + l_Offset)
              lo_Dictionary(l_DictionaryIndex) = pa_CompressedRtfBytes(ii + l_Offset)
             
              l_DictionaryIndex = l_DictionaryIndex + 1
             
              FixDictionaryIndex l_DictionaryIndex
           
            Else
              ' Compressed data
             
              '//reference bit, create word from two bytes
              l_Word = ShiftLeft(pa_CompressedRtfBytes(ii + l_Offset), 8) Or pa_CompressedRtfBytes(ii + (l_Offset + 1))

              '//get the offset into the dictionary
              l_Upper = ShiftRightZ(l_Word And &HFFF0&, 4)

              '//get the length of bytes to copy
              l_Lower = (l_Word And &HF) + 2

              If l_Upper = l_DictionaryIndex Then
                  '//special dictionary reference means that decompression is complete
                  Erase la_Dictionary
                  lo_UncompressedRtf.CopyToArray la_Dictionary
                  DecompressRtfBytes = StrConv(la_Dictionary, vbUnicode)
                 
                  Exit Function
                 
              End If

              '//cannot just copy the bytes over because the dictionary is a
              '//circular array so it must properly wrap to beginning
              For kk = 0 To l_Lower - 1
                  l_CorrectedOffset = l_Upper + kk
                  FixDictionaryIndex l_CorrectedOffset

                  If lo_UncompressedRtf.Count - 1 = lt_Header.UncompressedSize Then
                      '//this is the last token, the rest is just padding
                        Erase la_Dictionary
                        lo_UncompressedRtf.CopyToArray la_Dictionary
                        DecompressRtfBytes = StrConv(la_Dictionary, vbUnicode)
                 
                      Exit Function
                  End If

                  lo_UncompressedRtf.Add lo_Dictionary(l_CorrectedOffset)
                  lo_Dictionary(l_DictionaryIndex) = lo_Dictionary(l_CorrectedOffset)
                  l_DictionaryIndex = l_DictionaryIndex + 1

                  FixDictionaryIndex l_DictionaryIndex
              Next
             
              l_Offset = l_Offset + 1
            End If
           
            l_Offset = l_Offset + 1
        Next jj
       
        ii = ii + lt_Control.Length - 1
      Next ii
       
  Case Else
      Err.Raise 5, , "Unknown compression type: " & lt_Header.CompressionType
  End Select
End Function

Private Function GetRtfControl(ByVal p_Byte As Byte) As RtfControl
  Dim l_FlagsCount As Long
  Dim ii As Long
 
  With GetRtfControl
      For ii = LBound(.Flags) To UBound(.Flags)
        .Flags(ii) = ((p_Byte And ShiftLeft(&H1, ii)) = 0)
        If .Flags(ii) Then l_FlagsCount = l_FlagsCount + 1
      Next ii
      .Length = ((8 - l_FlagsCount) * 2) + l_FlagsCount + 1
  End With
End Function

Private Sub InitDictionary()
  Dim la_Dict() As Byte
 
  Set mo_Dictionary = Nothing
  If mo_Dictionary Is Nothing Then
      Set mo_Dictionary = New_c.ArrayList(vbByte)
      la_Dict = StrConv("{\rtf1\ansi\mac\deff0\deftab720{\fonttbl;}" & _
                        "{\f0\fnil \froman \fswiss \fmodern \fscript " & _
                        "\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\colortbl\red0\green0\blue0" & _
                        vbNewLine & _
                        "\par \pard\plain\f0\fs20\b\i\u\tab\tx", vbFromUnicode)
      mo_Dictionary.AddElements la_Dict
  End If
End Sub

Private Sub InitCrc()
  ' Found this code building CRC-32 table here:
  ' https://khoiriyyah.blogspot.com/2012/05/class-crc32-sebuah-file-vb6-code.html
 
  Const Limit = &HEDB88320
 
  Dim ii As Long
  Dim jj As Long
  Dim l_Crc As Long
 
  If mo_Crc Is Nothing Then
      Set mo_Crc = New_c.ArrayList(vbLong)
 
      For ii = 0 To 255
        l_Crc = ii
        For jj = 0 To 7
            If l_Crc And 1 Then
              l_Crc = (((l_Crc And &HFFFFFFFE) \ 2) And &H7FFFFFFF) Xor Limit
            Else
              l_Crc = ((l_Crc And &HFFFFFFFE) \ 2) And &H7FFFFFFF
            End If
        Next jj
     
        mo_Crc.Add l_Crc
      Next ii
  End If
End Sub

Private Sub FixDictionaryIndex(ByRef p_Index As Long)
  ' Make sure passed index is within our circular dictionary's range
  Do Until p_Index < mc_CircularDictionaryMaxLength
      p_Index = p_Index - mc_CircularDictionaryMaxLength
  Loop
End Sub

Private Function CalculateCrc(pa_Buffer() As Byte, Optional ByVal p_StartAtOffset As Long = 0) As Long
  ' Apparently CompressedRTF format uses a modified CRC-32 calculation.
  ' Described here: https://www.freeutils.net/source/jtnef/rtfcompressed
 
  Dim ii As Long
 
  For ii = p_StartAtOffset To UBound(pa_Buffer)
      CalculateCrc = (mo_Crc((CalculateCrc Xor pa_Buffer(ii)) And &HFF)) Xor (ShiftRightZ(CalculateCrc, 8))
  Next ii
End Function

Private Function ShiftLeft(ByVal Value As Long, ByVal ShiftCount As Long) As Long
  ' by Jost Schwider, jost@schwider.de, 20011001
  Select Case ShiftCount
  Case 0&
    ShiftLeft = Value
  Case 1&
    If Value And &H40000000 Then
      ShiftLeft = (Value And &H3FFFFFFF) * &H2& Or &H80000000
    Else
      ShiftLeft = (Value And &H3FFFFFFF) * &H2&
    End If
  Case 2&
    If Value And &H20000000 Then
      ShiftLeft = (Value And &H1FFFFFFF) * &H4& Or &H80000000
    Else
      ShiftLeft = (Value And &H1FFFFFFF) * &H4&
    End If
  Case 3&
    If Value And &H10000000 Then
      ShiftLeft = (Value And &HFFFFFFF) * &H8& Or &H80000000
    Else
      ShiftLeft = (Value And &HFFFFFFF) * &H8&
    End If
  Case 4&
    If Value And &H8000000 Then
      ShiftLeft = (Value And &H7FFFFFF) * &H10& Or &H80000000
    Else
      ShiftLeft = (Value And &H7FFFFFF) * &H10&
    End If
  Case 5&
    If Value And &H4000000 Then
      ShiftLeft = (Value And &H3FFFFFF) * &H20& Or &H80000000
    Else
      ShiftLeft = (Value And &H3FFFFFF) * &H20&
    End If
  Case 6&
    If Value And &H2000000 Then
      ShiftLeft = (Value And &H1FFFFFF) * &H40& Or &H80000000
    Else
      ShiftLeft = (Value And &H1FFFFFF) * &H40&
    End If
  Case 7&
    If Value And &H1000000 Then
      ShiftLeft = (Value And &HFFFFFF) * &H80& Or &H80000000
    Else
      ShiftLeft = (Value And &HFFFFFF) * &H80&
    End If
  Case 8&
    If Value And &H800000 Then
      ShiftLeft = (Value And &H7FFFFF) * &H100& Or &H80000000
    Else
      ShiftLeft = (Value And &H7FFFFF) * &H100&
    End If
  Case 9&
    If Value And &H400000 Then
      ShiftLeft = (Value And &H3FFFFF) * &H200& Or &H80000000
    Else
      ShiftLeft = (Value And &H3FFFFF) * &H200&
    End If
  Case 10&
    If Value And &H200000 Then
      ShiftLeft = (Value And &H1FFFFF) * &H400& Or &H80000000
    Else
      ShiftLeft = (Value And &H1FFFFF) * &H400&
    End If
  Case 11&
    If Value And &H100000 Then
      ShiftLeft = (Value And &HFFFFF) * &H800& Or &H80000000
    Else
      ShiftLeft = (Value And &HFFFFF) * &H800&
    End If
  Case 12&
    If Value And &H80000 Then
      ShiftLeft = (Value And &H7FFFF) * &H1000& Or &H80000000
    Else
      ShiftLeft = (Value And &H7FFFF) * &H1000&
    End If
  Case 13&
    If Value And &H40000 Then
      ShiftLeft = (Value And &H3FFFF) * &H2000& Or &H80000000
    Else
      ShiftLeft = (Value And &H3FFFF) * &H2000&
    End If
  Case 14&
    If Value And &H20000 Then
      ShiftLeft = (Value And &H1FFFF) * &H4000& Or &H80000000
    Else
      ShiftLeft = (Value And &H1FFFF) * &H4000&
    End If
  Case 15&
    If Value And &H10000 Then
      ShiftLeft = (Value And &HFFFF&) * &H8000& Or &H80000000
    Else
      ShiftLeft = (Value And &HFFFF&) * &H8000&
    End If
  Case 16&
    If Value And &H8000& Then
      ShiftLeft = (Value And &H7FFF&) * &H10000 Or &H80000000
    Else
      ShiftLeft = (Value And &H7FFF&) * &H10000
    End If
  Case 17&
    If Value And &H4000& Then
      ShiftLeft = (Value And &H3FFF&) * &H20000 Or &H80000000
    Else
      ShiftLeft = (Value And &H3FFF&) * &H20000
    End If
  Case 18&
    If Value And &H2000& Then
      ShiftLeft = (Value And &H1FFF&) * &H40000 Or &H80000000
    Else
      ShiftLeft = (Value And &H1FFF&) * &H40000
    End If
  Case 19&
    If Value And &H1000& Then
      ShiftLeft = (Value And &HFFF&) * &H80000 Or &H80000000
    Else
      ShiftLeft = (Value And &HFFF&) * &H80000
    End If
  Case 20&
    If Value And &H800& Then
      ShiftLeft = (Value And &H7FF&) * &H100000 Or &H80000000
    Else
      ShiftLeft = (Value And &H7FF&) * &H100000
    End If
  Case 21&
    If Value And &H400& Then
      ShiftLeft = (Value And &H3FF&) * &H200000 Or &H80000000
    Else
      ShiftLeft = (Value And &H3FF&) * &H200000
    End If
  Case 22&
    If Value And &H200& Then
      ShiftLeft = (Value And &H1FF&) * &H400000 Or &H80000000
    Else
      ShiftLeft = (Value And &H1FF&) * &H400000
    End If
  Case 23&
    If Value And &H100& Then
      ShiftLeft = (Value And &HFF&) * &H800000 Or &H80000000
    Else
      ShiftLeft = (Value And &HFF&) * &H800000
    End If
  Case 24&
    If Value And &H80& Then
      ShiftLeft = (Value And &H7F&) * &H1000000 Or &H80000000
    Else
      ShiftLeft = (Value And &H7F&) * &H1000000
    End If
  Case 25&
    If Value And &H40& Then
      ShiftLeft = (Value And &H3F&) * &H2000000 Or &H80000000
    Else
      ShiftLeft = (Value And &H3F&) * &H2000000
    End If
  Case 26&
    If Value And &H20& Then
      ShiftLeft = (Value And &H1F&) * &H4000000 Or &H80000000
    Else
      ShiftLeft = (Value And &H1F&) * &H4000000
    End If
  Case 27&
    If Value And &H10& Then
      ShiftLeft = (Value And &HF&) * &H8000000 Or &H80000000
    Else
      ShiftLeft = (Value And &HF&) * &H8000000
    End If
  Case 28&
    If Value And &H8& Then
      ShiftLeft = (Value And &H7&) * &H10000000 Or &H80000000
    Else
      ShiftLeft = (Value And &H7&) * &H10000000
    End If
  Case 29&
    If Value And &H4& Then
      ShiftLeft = (Value And &H3&) * &H20000000 Or &H80000000
    Else
      ShiftLeft = (Value And &H3&) * &H20000000
    End If
  Case 30&
    If Value And &H2& Then
      ShiftLeft = (Value And &H1&) * &H40000000 Or &H80000000
    Else
      ShiftLeft = (Value And &H1&) * &H40000000
    End If
  Case 31&
    If Value And &H1& Then
      ShiftLeft = &H80000000
    Else
      ShiftLeft = &H0&
    End If
  End Select
End Function

Private Function ShiftRightZ(ByVal Value As Long, ByVal ShiftCount As Long) As Long
  ' by Jost Schwider, jost@schwider.de, 20011001
  If Value And &H80000000 Then
    Select Case ShiftCount
    Case 0&:  ShiftRightZ = Value
    Case 1&:  ShiftRightZ = &H40000000 Or (Value And &H7FFFFFFF) \ &H2&
    Case 2&:  ShiftRightZ = &H20000000 Or (Value And &H7FFFFFFF) \ &H4&
    Case 3&:  ShiftRightZ = &H10000000 Or (Value And &H7FFFFFFF) \ &H8&
    Case 4&:  ShiftRightZ = &H8000000 Or (Value And &H7FFFFFFF) \ &H10&
    Case 5&:  ShiftRightZ = &H4000000 Or (Value And &H7FFFFFFF) \ &H20&
    Case 6&:  ShiftRightZ = &H2000000 Or (Value And &H7FFFFFFF) \ &H40&
    Case 7&:  ShiftRightZ = &H1000000 Or (Value And &H7FFFFFFF) \ &H80&
    Case 8&:  ShiftRightZ = &H800000 Or (Value And &H7FFFFFFF) \ &H100&
    Case 9&:  ShiftRightZ = &H400000 Or (Value And &H7FFFFFFF) \ &H200&
    Case 10&: ShiftRightZ = &H200000 Or (Value And &H7FFFFFFF) \ &H400&
    Case 11&: ShiftRightZ = &H100000 Or (Value And &H7FFFFFFF) \ &H800&
    Case 12&: ShiftRightZ = &H80000 Or (Value And &H7FFFFFFF) \ &H1000&
    Case 13&: ShiftRightZ = &H40000 Or (Value And &H7FFFFFFF) \ &H2000&
    Case 14&: ShiftRightZ = &H20000 Or (Value And &H7FFFFFFF) \ &H4000&
    Case 15&: ShiftRightZ = &H10000 Or (Value And &H7FFFFFFF) \ &H8000&
    Case 16&: ShiftRightZ = &H8000& Or (Value And &H7FFFFFFF) \ &H10000
    Case 17&: ShiftRightZ = &H4000& Or (Value And &H7FFFFFFF) \ &H20000
    Case 18&: ShiftRightZ = &H2000& Or (Value And &H7FFFFFFF) \ &H40000
    Case 19&: ShiftRightZ = &H1000& Or (Value And &H7FFFFFFF) \ &H80000
    Case 20&: ShiftRightZ = &H800& Or (Value And &H7FFFFFFF) \ &H100000
    Case 21&: ShiftRightZ = &H400& Or (Value And &H7FFFFFFF) \ &H200000
    Case 22&: ShiftRightZ = &H200& Or (Value And &H7FFFFFFF) \ &H400000
    Case 23&: ShiftRightZ = &H100& Or (Value And &H7FFFFFFF) \ &H800000
    Case 24&: ShiftRightZ = &H80& Or (Value And &H7FFFFFFF) \ &H1000000
    Case 25&: ShiftRightZ = &H40& Or (Value And &H7FFFFFFF) \ &H2000000
    Case 26&: ShiftRightZ = &H20& Or (Value And &H7FFFFFFF) \ &H4000000
    Case 27&: ShiftRightZ = &H10& Or (Value And &H7FFFFFFF) \ &H8000000
    Case 28&: ShiftRightZ = &H8& Or (Value And &H7FFFFFFF) \ &H10000000
    Case 29&: ShiftRightZ = &H4& Or (Value And &H7FFFFFFF) \ &H20000000
    Case 30&: ShiftRightZ = &H2& Or (Value And &H7FFFFFFF) \ &H40000000
    Case 31&: ShiftRightZ = &H1&
    End Select
  Else
    Select Case ShiftCount
    Case 0&:  ShiftRightZ = Value
    Case 1&:  ShiftRightZ = Value \ &H2&
    Case 2&:  ShiftRightZ = Value \ &H4&
    Case 3&:  ShiftRightZ = Value \ &H8&
    Case 4&:  ShiftRightZ = Value \ &H10&
    Case 5&:  ShiftRightZ = Value \ &H20&
    Case 6&:  ShiftRightZ = Value \ &H40&
    Case 7&:  ShiftRightZ = Value \ &H80&
    Case 8&:  ShiftRightZ = Value \ &H100&
    Case 9&:  ShiftRightZ = Value \ &H200&
    Case 10&: ShiftRightZ = Value \ &H400&
    Case 11&: ShiftRightZ = Value \ &H800&
    Case 12&: ShiftRightZ = Value \ &H1000&
    Case 13&: ShiftRightZ = Value \ &H2000&
    Case 14&: ShiftRightZ = Value \ &H4000&
    Case 15&: ShiftRightZ = Value \ &H8000&
    Case 16&: ShiftRightZ = Value \ &H10000
    Case 17&: ShiftRightZ = Value \ &H20000
    Case 18&: ShiftRightZ = Value \ &H40000
    Case 19&: ShiftRightZ = Value \ &H80000
    Case 20&: ShiftRightZ = Value \ &H100000
    Case 21&: ShiftRightZ = Value \ &H200000
    Case 22&: ShiftRightZ = Value \ &H400000
    Case 23&: ShiftRightZ = Value \ &H800000
    Case 24&: ShiftRightZ = Value \ &H1000000
    Case 25&: ShiftRightZ = Value \ &H2000000
    Case 26&: ShiftRightZ = Value \ &H4000000
    Case 27&: ShiftRightZ = Value \ &H8000000
    Case 28&: ShiftRightZ = Value \ &H10000000
    Case 29&: ShiftRightZ = Value \ &H20000000
    Case 30&: ShiftRightZ = Value \ &H40000000
    Case 31&: ShiftRightZ = &H0&
    End Select
  End If
End Function


Viewing all articles
Browse latest Browse all 1449

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>