Quantcast
Viewing all articles
Browse latest Browse all 1449

Quick Find String in large files(Supper Instrb by vb6)

Using memory mapping and virtual pointers, there is no need to allocate memory and convert it into a string for further search.
If you open an ansi format file, store it in a unicode string variable, and then search for it, a 200MB file may overflow memory.
By using this method, it is estimated that the limit of long can be reached, with 2GB files.
Perhaps supporting 1000GB file search, requiring segmented processing and also not occupying memory

If there is no Wide character unicode string, it will be faster to use instr directly. If there are Wide character, it will be better to use instrb.
A 200m file that can be opened in Notepad, modified with a few words, searched, and saved as a separate file for up to 5 minutes

Code:

dim id as long
id=FindTextInFile("c:\1.txt","abcd")


Code:

Option Explicit
Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Public Declare Function GetACP Lib "kernel32" () As Long 'GetACP:获取当前系统的ANSI代码页


 
Private Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (Destination As Any, Source As Any, ByVal Length As Long)
Private Declare Function CreateFile Lib "kernel32" Alias "CreateFileA" (ByVal lpFileName As String, ByVal dwDesiredAccess As Long, ByVal dwShareMode As Long, ByVal lpSecurityAttributes As Long, ByVal dwCreationDisposition As Long, ByVal dwFlagsAndAttributes As Long, ByVal hTemplateFile As Long) As Long
Private Declare Function CloseHandle Lib "kernel32" (ByVal hObject As Long) As Long
Private Declare Function GetFileSize Lib "kernel32" (ByVal hFile As Long, lpFileSizeHigh As Long) As Long
Private Const GENERIC_READ = &H80000000
Private Const GENERIC_WRITE = &H40000000
Private Const OPEN_EXISTING = 3
Private Const FILE_SHARE_READ = &H1
Private Const FILE_SHARE_WRITE = &H2
Private Const FILE_ATTRIBUTE_NORMAL = &H80
Private Const FILE_ATTRIBUTE_ARCHIVE = &H20
Private Const FILE_ATTRIBUTE_READONLY = &H1
Private Const FILE_ATTRIBUTE_HIDDEN = &H2
Private Const FILE_ATTRIBUTE_SYSTEM = &H4
 
Private Declare Function CreateFileMapping Lib "kernel32" Alias "CreateFileMappingA" (ByVal hFile As Long, ByVal lpFileMappigAttributes As Long, ByVal flProtect As Long, ByVal dwMaximumSizeHigh As Long, ByVal dwMaximumSizeLow As Long, ByVal lpName As String) As Long
Private Declare Function MapViewOfFile Lib "kernel32" (ByVal hFileMappingObject As Long, ByVal dwDesiredAccess As Long, ByVal dwFileOffsetHigh As Long, ByVal dwFileOffsetLow As Long, ByVal dwNumberOfBytesToMap As Long) As Long
Private Declare Function UnmapViewOfFile Lib "kernel32" (lpBaseAddress As Any) As Long
Private Const PAGE_READWRITE = &H4
Private Const FILE_MAP_READ = &H4
  Public Const FILE_MAP_WRITE = &H2&

Private Declare Function VarPtrArray Lib "msvbvm60.dll" Alias "VarPtr" (Ptr() As Any) As Long
Private Type SAFEARRAYBOUND
    cElements As Long
    lLbound As Long
End Type
Private Type SAFEARRAY1D
    cDims As Integer
    fFeatures As Integer
    cbElements As Long
    clocks As Long
    pvData As Long
    rgsabound(0) As SAFEARRAYBOUND
End Type
 
'使用内存映射方式查找大型文件中包含的字符串
'Using memory mapping to find strings contained in large files
Function FindTextInFile(ByVal strFileName As String, ByVal strText As String, Optional Start As Long = 1) As Long
    Dim hFile As Long, hFileMap As Long
    Dim nFileSize As Long, lpszFileText As Long, pbFileText() As Byte
    Dim ppSA As Long, pSA As Long
    Dim tagNewSA As SAFEARRAY1D, tagOldSA As SAFEARRAY1D
 
    hFile = CreateFile(strFileName, _
            GENERIC_READ Or GENERIC_WRITE, _
            FILE_SHARE_READ Or FILE_SHARE_WRITE, _
            0, _
            OPEN_EXISTING, _
            FILE_ATTRIBUTE_NORMAL Or FILE_ATTRIBUTE_ARCHIVE Or FILE_ATTRIBUTE_READONLY Or _
            FILE_ATTRIBUTE_HIDDEN Or FILE_ATTRIBUTE_SYSTEM, _
            0) '打开文件
    If hFile <> 0 Then
        nFileSize = GetFileSize(hFile, ByVal 0&) '获得文件大小
        hFileMap = CreateFileMapping(hFile, 0, PAGE_READWRITE, 0, 0, vbNullString) '创建文件映射对象
        lpszFileText = MapViewOfFile(hFileMap, FILE_MAP_WRITE, 0, 0, 0) '将映射对象映射到进程内部的地址空间
         
        ReDim pbFileText(0) '初始化数组
        ppSA = VarPtrArray(pbFileText) '获得指向SAFEARRAY的指针的指针
        CopyMemory pSA, ByVal ppSA, 4 '获得指向SAFEARRAY的指针
        CopyMemory tagOldSA, ByVal pSA, Len(tagOldSA) '保存原来的SAFEARRAY成员信息
        CopyMemory tagNewSA, tagOldSA, Len(tagNewSA) '复制SAFEARRAY成员信息
        tagNewSA.rgsabound(0).cElements = nFileSize '修改数组元素个数
        tagNewSA.pvData = lpszFileText '修改数组数据地址
        CopyMemory ByVal pSA, tagNewSA, Len(tagNewSA) '将映射后的数据地址绑定至数组
         
        Dim ID As Long
        ID = InStrB(Start, pbFileText, StrConv(strText, vbFromUnicode))    '查找子字符串位置
        If ID > 0 Then
            ID = MultiByteToWideChar(GetACP, 0, lpszFileText, ID - 1, 0, 0) + 1 '取得转换后需要的空间大小retLen
            FindTextInFile = ID
        End If
         
        CopyMemory ByVal pSA, tagOldSA, Len(tagOldSA) '恢复数组的SAFEARRAY结构成员信息
        Erase pbFileText '删除数组
         
        UnmapViewOfFile lpszFileText '取消地址映射
        CloseHandle hFileMap '关闭文件映射对象的句柄
    End If
    CloseHandle hFile '关闭文件
End Function


Viewing all articles
Browse latest Browse all 1449

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>