Reading 2GB+ files in binary in VBA and File Hashes
Introduction#
There is a built in easy way to read files in binary within VBA, however it has a restriction of 2GB (2,147,483,647 bytes - max of Long data type). As technology evolves, this 2GB limit is easily breached. e.g. an ISO image of Operating System install DVD disc. Microsoft does provide a way to overcome this via low level Windows API and here is a backup of it.
Also demonstrate (Read part) for calculating File Hashes without external program like fciv.exe
from Microsoft.
Remarks#
METHODS FOR THE CLASS BY MICROSOFT
Method Name | Description |
---|---|
IsOpen | Returns a boolean to indicate whether the file is open. |
OpenFile(sFileName As String) | Opens the file specified by the sFileName argument. |
CloseFile | Closes the currently open file. |
ReadBytes(ByteCount As Long) | Reads ByteCount bytes and returns them in a Variant byte array and moves the pointer. |
WriteBytes(DataBytes() As Byte) | Writes the contents of the byte array to the current position in the file and moves the pointer. |
Flush | Forces Windows to flush the write cache. |
SeekAbsolute(HighPos As Long, LowPos As Long) | Moves the file pointer to the designated position from the beginning of the file. Though VBA treats the DWORDS as signed values, the API treats them as unsigned. Make the high-order argument non-zero to exceed 4GB. The low-order DWORD will be negative for values between 2GB and 4GB. |
SeekRelative(Offset As Long) | Moves the file pointer up to +/- 2GB from the current location. You can rewrite this method to allow for offsets greater than 2GB by converting a 64-bit signed offset into two 32-bit values. |
PROPERTIES OF THE CLASS BY MICROSOFT
Property | Description |
---|---|
FileHandle | The file handle for the currently open file. This is not compatible with VBA file handles. |
FileName | The name of the currently open file. |
AutoFlush | Sets/indicates whether WriteBytes will automatically call the Flush method. |
NORMAL MODULE
Function | Notes |
---|---|
GetFileHash(sFile As String, uBlockSize As Double, sHashType As String) | Simply throw in the full path to be hashed, Blocksize to use (number of bytes), and the type of Hash to use - one of the private constants: HashTypeMD5, HashTypeSHA1, HashTypeSHA256, HashTypeSHA384, HashTypeSHA512. This was designed to be as generic as possible. |
You should un/comment the uFileSize As Double accordingly. I have tested MD5 and SHA1.
This have to be in a Class module, examples later referred as “Random”
' How To Seek Past VBA's 2GB File Limit
' Source: https://support.microsoft.com/en-us/kb/189981 (Archived)
' This must be in a Class Module
Option Explicit
Public Enum W32F_Errors
W32F_UNKNOWN_ERROR = 45600
W32F_FILE_ALREADY_OPEN
W32F_PROBLEM_OPENING_FILE
W32F_FILE_ALREADY_CLOSED
W32F_Problem_seeking
End Enum
Private Const W32F_SOURCE = "Win32File Object"
Private Const GENERIC_WRITE = &H40000000
Private Const GENERIC_READ = &H80000000
Private Const FILE_ATTRIBUTE_NORMAL = &H80
Private Const CREATE_ALWAYS = 2
Private Const OPEN_ALWAYS = 4
Private Const INVALID_HANDLE_VALUE = -1
Private Const FILE_BEGIN = 0, FILE_CURRENT = 1, FILE_END = 2
Private Const FORMAT_MESSAGE_FROM_SYSTEM = &H1000
Private Declare Function FormatMessage Lib "kernel32" Alias "FormatMessageA" ( _
ByVal dwFlags As Long, _
lpSource As Long, _
ByVal dwMessageId As Long, _
ByVal dwLanguageId As Long, _
ByVal lpBuffer As String, _
ByVal nSize As Long, _
Arguments As Any) As Long
Private Declare Function ReadFile Lib "kernel32" ( _
ByVal hFile As Long, _
lpBuffer As Any, _
ByVal nNumberOfBytesToRead As Long, _
lpNumberOfBytesRead As Long, _
ByVal lpOverlapped As Long) As Long
Private Declare Function CloseHandle Lib "kernel32" (ByVal hObject As Long) As Long
Private Declare Function WriteFile Lib "kernel32" ( _
ByVal hFile As Long, _
lpBuffer As Any, _
ByVal nNumberOfBytesToWrite As Long, _
lpNumberOfBytesWritten As Long, _
ByVal lpOverlapped As Long) As Long
Private Declare Function CreateFile Lib "kernel32" Alias "CreateFileA" ( _
ByVal lpFileName As String, _
ByVal dwDesiredAccess As Long, _
ByVal dwShareMode As Long, _
ByVal lpSecurityAttributes As Long, _
ByVal dwCreationDisposition As Long, _
ByVal dwFlagsAndAttributes As Long, _
ByVal hTemplateFile As Long) As Long
Private Declare Function SetFilePointer Lib "kernel32" ( _
ByVal hFile As Long, _
ByVal lDistanceToMove As Long, _
lpDistanceToMoveHigh As Long, _
ByVal dwMoveMethod As Long) As Long
Private Declare Function FlushFileBuffers Lib "kernel32" (ByVal hFile As Long) As Long
Private hFile As Long, sFName As String, fAutoFlush As Boolean
Public Property Get FileHandle() As Long
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
FileHandle = hFile
End Property
Public Property Get FileName() As String
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
FileName = sFName
End Property
Public Property Get IsOpen() As Boolean
IsOpen = hFile <> INVALID_HANDLE_VALUE
End Property
Public Property Get AutoFlush() As Boolean
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
AutoFlush = fAutoFlush
End Property
Public Property Let AutoFlush(ByVal NewVal As Boolean)
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
fAutoFlush = NewVal
End Property
Public Sub OpenFile(ByVal sFileName As String)
If hFile <> INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_OPEN, sFName
End If
hFile = CreateFile(sFileName, GENERIC_WRITE Or GENERIC_READ, 0, 0, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0)
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_PROBLEM_OPENING_FILE, sFileName
End If
sFName = sFileName
End Sub
Public Sub CloseFile()
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
CloseHandle hFile
sFName = ""
fAutoFlush = False
hFile = INVALID_HANDLE_VALUE
End Sub
Public Function ReadBytes(ByVal ByteCount As Long) As Variant
Dim BytesRead As Long, Bytes() As Byte
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
ReDim Bytes(0 To ByteCount - 1) As Byte
ReadFile hFile, Bytes(0), ByteCount, BytesRead, 0
ReadBytes = Bytes
End Function
Public Sub WriteBytes(DataBytes() As Byte)
Dim fSuccess As Long, BytesToWrite As Long, BytesWritten As Long
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
BytesToWrite = UBound(DataBytes) - LBound(DataBytes) + 1
fSuccess = WriteFile(hFile, DataBytes(LBound(DataBytes)), BytesToWrite, BytesWritten, 0)
If fAutoFlush Then Flush
End Sub
Public Sub Flush()
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
FlushFileBuffers hFile
End Sub
Public Sub SeekAbsolute(ByVal HighPos As Long, ByVal LowPos As Long)
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
LowPos = SetFilePointer(hFile, LowPos, HighPos, FILE_BEGIN)
End Sub
Public Sub SeekRelative(ByVal Offset As Long)
Dim TempLow As Long, TempErr As Long
If hFile = INVALID_HANDLE_VALUE Then
RaiseError W32F_FILE_ALREADY_CLOSED
End If
TempLow = SetFilePointer(hFile, Offset, ByVal 0&, FILE_CURRENT)
If TempLow = -1 Then
TempErr = Err.LastDllError
If TempErr Then
RaiseError W32F_Problem_seeking, "Error " & TempErr & "." & vbCrLf & CStr(TempErr)
End If
End If
End Sub
Private Sub Class_Initialize()
hFile = INVALID_HANDLE_VALUE
End Sub
Private Sub Class_Terminate()
If hFile <> INVALID_HANDLE_VALUE Then CloseHandle hFile
End Sub
Private Sub RaiseError(ByVal ErrorCode As W32F_Errors, Optional sExtra)
Dim Win32Err As Long, Win32Text As String
Win32Err = Err.LastDllError
If Win32Err Then
Win32Text = vbCrLf & "Error " & Win32Err & vbCrLf & _
DecodeAPIErrors(Win32Err)
End If
Select Case ErrorCode
Case W32F_FILE_ALREADY_OPEN
Err.Raise W32F_FILE_ALREADY_OPEN, W32F_SOURCE, "The file '" & sExtra & "' is already open." & Win32Text
Case W32F_PROBLEM_OPENING_FILE
Err.Raise W32F_PROBLEM_OPENING_FILE, W32F_SOURCE, "Error opening '" & sExtra & "'." & Win32Text
Case W32F_FILE_ALREADY_CLOSED
Err.Raise W32F_FILE_ALREADY_CLOSED, W32F_SOURCE, "There is no open file."
Case W32F_Problem_seeking
Err.Raise W32F_Problem_seeking, W32F_SOURCE, "Seek Error." & vbCrLf & sExtra
Case Else
Err.Raise W32F_UNKNOWN_ERROR, W32F_SOURCE, "Unknown error." & Win32Text
End Select
End Sub
Private Function DecodeAPIErrors(ByVal ErrorCode As Long) As String
Dim sMessage As String, MessageLength As Long
sMessage = Space$(256)
MessageLength = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0&, ErrorCode, 0&, sMessage, 256&, 0&)
If MessageLength > 0 Then
DecodeAPIErrors = Left(sMessage, MessageLength)
Else
DecodeAPIErrors = "Unknown Error."
End If
End Function
Code for Calculating File Hash in a Standard module
Private Const HashTypeMD5 As String = "MD5" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.md5cryptoserviceprovider(v=vs.110).aspx
Private Const HashTypeSHA1 As String = "SHA1" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.sha1cryptoserviceprovider(v=vs.110).aspx
Private Const HashTypeSHA256 As String = "SHA256" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.sha256cryptoserviceprovider(v=vs.110).aspx
Private Const HashTypeSHA384 As String = "SHA384" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.sha384cryptoserviceprovider(v=vs.110).aspx
Private Const HashTypeSHA512 As String = "SHA512" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.sha512cryptoserviceprovider(v=vs.110).aspx
Private uFileSize As Double ' Comment out if not testing performance by FileHashes()
Sub FileHashes()
Dim tStart As Date, tFinish As Date, sHash As String, aTestFiles As Variant, oTestFile As Variant, aBlockSizes As Variant, oBlockSize As Variant
Dim BLOCKSIZE As Double
' This performs performance testing on different file sizes and block sizes
aBlockSizes = Array("2^12-1", "2^13-1", "2^14-1", "2^15-1", "2^16-1", "2^17-1", "2^18-1", "2^19-1", "2^20-1", "2^21-1", "2^22-1", "2^23-1", "2^24-1", "2^25-1", "2^26-1")
aTestFiles = Array("C:\ISO\clonezilla-live-2.2.2-37-amd64.iso", "C:\ISO\HPIP201.2014_0902.29.iso", "C:\ISO\SW_DVD5_Windows_Vista_Business_W32_32BIT_English.ISO", "C:\ISO\Win10_1607_English_x64.iso", "C:\ISO\SW_DVD9_Windows_Svr_Std_and_DataCtr_2012_R2_64Bit_English.ISO")
Debug.Print "Test files: " & Join(aTestFiles, " | ")
Debug.Print "BlockSizes: " & Join(aBlockSizes, " | ")
For Each oTestFile In aTestFiles
Debug.Print oTestFile
For Each oBlockSize In aBlockSizes
BLOCKSIZE = Evaluate(oBlockSize)
tStart = Now
sHash = GetFileHash(CStr(oTestFile), BLOCKSIZE, HashTypeMD5)
tFinish = Now
Debug.Print sHash, uFileSize, Format(tFinish - tStart, "hh:mm:ss"), oBlockSize & " (" & BLOCKSIZE & ")"
Next
Next
End Sub
Private Function GetFileHash(ByVal sFile As String, ByVal uBlockSize As Double, ByVal sHashType As String) As String
Dim oFSO As Object ' "Scripting.FileSystemObject"
Dim oCSP As Object ' One of the "CryptoServiceProvider"
Dim oRnd As Random ' "Random" Class by Microsoft, must be in the same file
Dim uBytesRead As Double, uBytesToRead As Double, bDone As Boolean
Dim aBlock() As Byte, aBytes As Variant ' Arrays to store bytes
Dim aHash() As Byte, sHash As String, i As Long
'Dim uFileSize As Double ' Un-Comment if GetFileHash() is to be used individually
Set oRnd = New Random ' Class by Microsoft: Random
Set oFSO = CreateObject("Scripting.FileSystemObject")
Set oCSP = CreateObject("System.Security.Cryptography." & sHashType & "CryptoServiceProvider")
If oFSO Is Nothing Or oRnd Is Nothing Or oCSP Is Nothing Then
MsgBox "One or more required objects cannot be created"
GoTo CleanUp
End If
uFileSize = oFSO.GetFile(sFile).Size ' FILELEN() has 2GB max!
uBytesRead = 0
bDone = False
sHash = String(oCSP.HashSize / 4, "0") ' Each hexadecimal has 4 bits
Application.ScreenUpdating = False
' Process the file in chunks of uBlockSize or less
If uFileSize = 0 Then
ReDim aBlock(0)
oCSP.TransformFinalBlock aBlock, 0, 0
bDone = True
Else
With oRnd
.OpenFile sFile
Do
If uBytesRead + uBlockSize < uFileSize Then
uBytesToRead = uBlockSize
Else
uBytesToRead = uFileSize - uBytesRead
bDone = True
End If
' Read in some bytes
aBytes = .ReadBytes(uBytesToRead)
aBlock = aBytes
If bDone Then
oCSP.TransformFinalBlock aBlock, 0, uBytesToRead
uBytesRead = uBytesRead + uBytesToRead
Else
uBytesRead = uBytesRead + oCSP.TransformBlock(aBlock, 0, uBytesToRead, aBlock, 0)
End If
DoEvents
Loop Until bDone
.CloseFile
End With
End If
If bDone Then
' convert Hash byte array to an hexadecimal string
aHash = oCSP.hash
For i = 0 To UBound(aHash)
Mid$(sHash, i * 2 + (aHash(i) > 15) + 2) = Hex(aHash(i))
Next
End If
Application.ScreenUpdating = True
' Clean up
oCSP.Clear
CleanUp:
Set oFSO = Nothing
Set oRnd = Nothing
Set oCSP = Nothing
GetFileHash = sHash
End Function
The output is pretty interesting, my test files indicates that
BLOCKSIZE = 131071
(2^17-1) gives overall best performance with 32bit Office 2010 on Windows 7 x64, next best is 2^16-1 (65535). Note2^27-1
yields Out of memory.
File Size (bytes) | File Name |
---|---|
146,800,640 | clonezilla-live-2.2.2-37-amd64.iso |
798,210,048 | HPIP201.2014_0902.29.iso |
2,073,016,320 | SW_DVD5_Windows_Vista_Business_W32_32BIT_English.ISO |
4,380,387,328 | Win10_1607_English_x64.iso |
5,400,115,200 | SW_DVD9_Windows_Svr_Std_and_DataCtr_2012_R2_64Bit_English.ISO |
Calculating all Files Hash from a root Folder
Another variation from the code above gives you more performance when you want to get hash codes of all files from a root folder including all sub folders.
Example of Worksheet:
Code
Option Explicit
Private Const HashTypeMD5 As String = "MD5" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.md5cryptoserviceprovider(v=vs.110).aspx
Private Const HashTypeSHA1 As String = "SHA1" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.sha1cryptoserviceprovider(v=vs.110).aspx
Private Const HashTypeSHA256 As String = "SHA256" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.sha256cryptoserviceprovider(v=vs.110).aspx
Private Const HashTypeSHA384 As String = "SHA384" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.sha384cryptoserviceprovider(v=vs.110).aspx
Private Const HashTypeSHA512 As String = "SHA512" ' https://msdn.microsoft.com/en-us/library/system.security.cryptography.sha512cryptoserviceprovider(v=vs.110).aspx
Private Const BLOCKSIZE As Double = 131071 ' 2^17-1
Private oFSO As Object
Private oCSP As Object
Private oRnd As Random ' Requires the Class from Microsoft https://support.microsoft.com/en-us/kb/189981
Private sHashType As String
Private sRootFDR As String
Private oRng As Range
Private uFileCount As Double
Sub AllFileHashes() ' Active-X button calls this
Dim oWS As Worksheet
' | A: FileHash | B: FileSize | C: FileName | D: FilaName and Path | E: File Last Modification Time | F: Time required to calculate has code (seconds)
With ThisWorkbook
' Clear All old entries on all worksheets
For Each oWS In .Worksheets
Set oRng = Intersect(oWS.UsedRange, oWS.UsedRange.Offset(2))
If Not oRng Is Nothing Then oRng.ClearContents
Next
With .Worksheets(1)
sHashType = Trim(.Range("A1").Value) ' Range(A1)
sRootFDR = Trim(.Range("C1").Value) ' Range(C1) Column B for file size
If Len(sHashType) = 0 Or Len(sRootFDR) = 0 Then Exit Sub
Set oRng = .Range("A3") ' First entry on First Page
End With
End With
uFileCount = 0
If oRnd Is Nothing Then Set oRnd = New Random ' Class by Microsoft: Random
If oFSO Is Nothing Then Set oFSO = CreateObject("Scripting.FileSystemObject") ' Just to get correct FileSize
If oCSP Is Nothing Then Set oCSP = CreateObject("System.Security.Cryptography." & sHashType & "CryptoServiceProvider")
ProcessFolder oFSO.GetFolder(sRootFDR)
Application.StatusBar = False
Application.ScreenUpdating = True
oCSP.Clear
Set oCSP = Nothing
Set oRng = Nothing
Set oFSO = Nothing
Set oRnd = Nothing
Debug.Print "Total file count: " & uFileCount
End Sub
Private Sub ProcessFolder(ByRef oFDR As Object)
Dim oFile As Object, oSubFDR As Object, sHash As String, dStart As Date, dFinish As Date
Application.ScreenUpdating = False
For Each oFile In oFDR.Files
uFileCount = uFileCount + 1
Application.StatusBar = uFileCount & ": " & Right(oFile.Path, 255 - Len(uFileCount) - 2)
oCSP.Initialize ' Reinitialize the CryptoServiceProvider
dStart = Now
sHash = GetFileHash(oFile, BLOCKSIZE, sHashType)
dFinish = Now
With oRng
.Value = sHash
.Offset(0, 1).Value = oFile.Size ' File Size in bytes
.Offset(0, 2).Value = oFile.Name ' File name with extension
.Offset(0, 3).Value = oFile.Path ' Full File name and Path
.Offset(0, 4).Value = FileDateTime(oFile.Path) ' Last modification timestamp of file
.Offset(0, 5).Value = dFinish - dStart ' Time required to calculate hash code
End With
If oRng.Row = Rows.Count Then
' Max rows reached, start on Next sheet
If oRng.Worksheet.Index + 1 > ThisWorkbook.Worksheets.Count Then
MsgBox "All rows in all worksheets have been used, please create more sheets"
End
End If
Set oRng = ThisWorkbook.Sheets(oRng.Worksheet.Index + 1).Range("A3")
oRng.Worksheet.Activate
Else
' Move to next row otherwise
Set oRng = oRng.Offset(1)
End If
Next
'Application.StatusBar = False
Application.ScreenUpdating = True
oRng.Activate
For Each oSubFDR In oFDR.SubFolders
ProcessFolder oSubFDR
Next
End Sub
Private Function GetFileHash(ByVal sFile As String, ByVal uBlockSize As Double, ByVal sHashType As String) As String
Dim uBytesRead As Double, uBytesToRead As Double, bDone As Boolean
Dim aBlock() As Byte, aBytes As Variant ' Arrays to store bytes
Dim aHash() As Byte, sHash As String, i As Long, oTmp As Variant
Dim uFileSize As Double ' Un-Comment if GetFileHash() is to be used individually
If oRnd Is Nothing Then Set oRnd = New Random ' Class by Microsoft: Random
If oFSO Is Nothing Then Set oFSO = CreateObject("Scripting.FileSystemObject") ' Just to get correct FileSize
If oCSP Is Nothing Then Set oCSP = CreateObject("System.Security.Cryptography." & sHashType & "CryptoServiceProvider")
If oFSO Is Nothing Or oRnd Is Nothing Or oCSP Is Nothing Then
MsgBox "One or more required objects cannot be created"
Exit Function
End If
uFileSize = oFSO.GetFile(sFile).Size ' FILELEN() has 2GB max
uBytesRead = 0
bDone = False
sHash = String(oCSP.HashSize / 4, "0") ' Each hexadecimal is 4 bits
' Process the file in chunks of uBlockSize or less
If uFileSize = 0 Then
ReDim aBlock(0)
oCSP.TransformFinalBlock aBlock, 0, 0
bDone = True
Else
With oRnd
On Error GoTo CannotOpenFile
.OpenFile sFile
Do
If uBytesRead + uBlockSize < uFileSize Then
uBytesToRead = uBlockSize
Else
uBytesToRead = uFileSize - uBytesRead
bDone = True
End If
' Read in some bytes
aBytes = .ReadBytes(uBytesToRead)
aBlock = aBytes
If bDone Then
oCSP.TransformFinalBlock aBlock, 0, uBytesToRead
uBytesRead = uBytesRead + uBytesToRead
Else
uBytesRead = uBytesRead + oCSP.TransformBlock(aBlock, 0, uBytesToRead, aBlock, 0)
End If
DoEvents
Loop Until bDone
.CloseFile
CannotOpenFile:
If Err.Number <> 0 Then ' Change the hash code to the Error description
oTmp = Split(Err.Description, vbCrLf)
sHash = oTmp(1) & ":" & oTmp(2)
End If
End With
End If
If bDone Then
' convert Hash byte array to an hexadecimal string
aHash = oCSP.hash
For i = 0 To UBound(aHash)
Mid$(sHash, i * 2 + (aHash(i) > 15) + 2) = Hex(aHash(i))
Next
End If
GetFileHash = sHash
End Function