Some context
The attempt is keep a hash for the content of each file in a list.
Example
file1.txt "This is a long story about dragons and..." -> Hash A28F30...
file2.txt "Larousse dictionary. Letter A..." -> Hash 98BC012...
For this, the program listener to detect any modification to these files, in order to invalidate (and recalculate) the hash in case of creation/modification/deletion.
The file path is registered for an FSEvent and any modification to the file will then invalidate the hash.
This strategy works most of the time, but complications arrives when links are used:
$ ln /Users/myself/test_file.txt /other_path/file.txt
$ echo "changed" >> /other_path/file.txt
This modify the file in /Users/myself/test_file.txt
, but no event for this modification is received (breaking the hash <-> file equivalence).
Following is a a short code example:
#include <iostream>
#include <unordered_map>
#include <vector>
#include <CoreServices/CoreServices.h>
#include <sys/stat.h>
using namespace std;
using FileKey = string;
using FileData = struct{};
using AppHashTable = std::unordered_map<FileKey, FileData>;
void invalidateCache( AppHashTable& appCache, const string& file)
{
(void) appCache; //no unused
cout << "Invalidate " << file << endl;
}
auto callback(
ConstFSEventStreamRef streamRef,
void *info,
size_t numEvents,
void *eventPaths,
const FSEventStreamEventFlags *eventFlags,
const FSEventStreamEventId * eventIds)-> void
{
(void) streamRef; //No unused
(void) eventIds; //No unused
static const char* flags[] = {
"MustScanSubDirs",
"UserDropped",
"KernelDropped",
"EventIdsWrapped",
"HistoryDone",
"RootChanged",
"Mount",
"Unmount",
"ItemCreated",
"ItemRemoved",
"ItemInodeMetaMod",
"ItemRenamed",
"ItemModified",
"ItemFinderInfoMod",
"ItemChangeOwner",
"ItemXattrMod",
"ItemIsFile",
"ItemIsDir",
"ItemIsSymlink",
"OwnEvent"
} ;
cout << endl;
char** pathsList = static_cast<char**>(eventPaths);
for(size_t i = 0; i<numEvents; i++)
{
cout << "Event " << i << " of " << numEvents << endl;
cout << " " << pathsList[i] << endl;
cout << " Flags: ";
long bit = 1 ;
for( int index=0, count = sizeof( flags ) / sizeof( flags[0]);
index < count; ++index )
{
if ( ( eventFlags[i] & bit ) != 0 )
{
cout << flags[index] << " ";
}
bit <<= 1 ;
}
cout << endl;
invalidateCache(*static_cast<AppHashTable*>(info), pathsList[i]);
}
}
vector<string> getPaths()
{
return {
"/Users/myself/test_folder",
"/Users/myself/test_file.txt"};
}
int main(int , const char * [])
{
AppHashTable appHashTable;
const auto vecPaths = getPaths();
// Create CF array of paths TODO to structure RAII
vector<CFStringRef> cfVecPaths;
for (const auto& p: vecPaths)
{
cfVecPaths.push_back( CFStringCreateWithCString( kCFAllocatorDefault, p.c_str(),
kCFStringEncodingUTF8 ));
}
CFArrayRef paths = CFArrayCreate(nullptr, reinterpret_cast<const void **>(cfVecPaths.data()),
static_cast<CFIndex>(cfVecPaths.size()), &kCFTypeArrayCallBacks );
FSEventStreamContext info{0, reinterpret_cast<void*>(&appHashTable), nullptr, nullptr, nullptr};
CFRunLoopRef loop = CFRunLoopGetCurrent() ;
FSEventStreamRef stream = FSEventStreamCreate(nullptr, callback, &info, paths,
kFSEventStreamEventIdSinceNow, 0.1, kFSEventStreamCreateFlagFileEvents );
FSEventStreamScheduleWithRunLoop(stream, loop, kCFRunLoopDefaultMode);
FSEventStreamStart(stream);
CFRunLoopRun() ;
FSEventStreamStop(stream);
FSEventStreamInvalidate(stream);
FSEventStreamRelease(stream);
// Release CF array of paths
if ( paths ) { CFRelease( paths ) ; }
for (auto& p: cfVecPaths)
{
CFRelease(p);
}
return 0;
}
Question:
How to detect modifications to the file through a link?
And why is the link creation/modification not detected?
Obviously, registering events for the full file-system is not an option.
Additional data
(Targeting macOS High-sierra to Big-Sur, using C++)
For convenience:
cmake_minimum_required(VERSION 3.0)
set(PROJECT_NAME "poc_events")
project(${PROJECT_NAME})
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -framework CoreServices")
set ( TESTCPP main.cpp )
add_executable( ${PROJECT_NAME} ${TESTCPP} )
My founds up to now (and I will stop researching this topic):
An example of how to monitor a file with Kernel Queues can be found here:
https://developer.apple.com/library/archive/documentation/Darwin/Conceptual/FSEvents_ProgGuide/KernelQueues/KernelQueues.html#A_Brief_Example