I am trying to build a program to calculate similarity between documents. But first, I want to get document contents from two different directories , below is my code which is worked, but only getting document contents from one directory/folder. Where I want to compare documents of Folder 1 with document of Folder 2.
from os import listdir
from os.path import isfile, join
BASE_INPUT_DIR = "./folder1/"
def returnListOfFilePaths(folderPath):
fileInfo = []
listOfFileNames = [fileName for fileName in listdir(folderPath) if isfile(join(folderPath, fileName))]
listOfFilePaths = [join(folderPath, fileName) for fileName in listdir(folderPath) if isfile(join(folderPath, fileName))]
fileInfo.append(listOfFileNames)
fileInfo.append(listOfFilePaths)
return fileInfo
fileNames, filePaths = returnListOfFilePaths(folder1)
print(fileNames, "\n", filePaths)
def create_docContentDict(filePaths):
rawContentDict = {}
for filePath in filePaths:
with open(filePath, "r") as ifile:
fileContent = ifile.read()
rawContentDict[filePath] = fileContent
return rawContentDict
rawContentDict = create_docContentDict(filePaths)
print(rawContentDict)
I did some changes, but still getting docs from only one directory. here's my changes
BASE_INPUT_DIR = "./folder1/"
BASE_INPUT_DIR2 = "./folder2/"
def returnListOfFilePaths(folderPath,folderPath2):
fileInfo = []
fileInfo2 = []
listOfFileNames = [fileName for fileName in listdir(folderPath) if isfile(join(folderPath, fileName))]
listOfFilePaths = [join(folderPath, fileName) for fileName in listdir(folderPath) if isfile(join(folderPath, fileName))]
listOfFileNames2 = [fileName for fileName in listdir(folderPath2) if isfile(join(folderPath2, fileName))]
listOfFilePaths2 = [join(folderPath2, fileName) for fileName in listdir(folderPath2) if isfile(join(folderPath2, fileName)
)]
fileInfo.append(listOfFileNames)
fileInfo.append(listOfFilePaths)
fileInfo2.append(listOfFileNames2)
fileInfo2.append(listOfFilePaths2)
return fileInfo
return fileinfo2
fileNames, filePaths = returnListOfFilePaths(BASE_INPUT_DIR,BASE_INPUT_DIR2 )
print(fileNames, "\n", filePaths)
def create_docContentDict(filePaths):
rawContentDict = {}
for filePath in filePaths:
with open(filePath, "r") as ifile:
fileContent = ifile.read()
rawContentDict[filePath] = fileContent
return rawContentDict
rawContentDict = create_docContentDict(filePaths)
print(rawContentDict)
Thanks