Get document contents from two different directories ( my code only work with one direction !)

41 views Asked by At

I am trying to build a program to calculate similarity between documents. But first, I want to get document contents from two different directories , below is my code which is worked, but only getting document contents from one directory/folder. Where I want to compare documents of Folder 1 with document of Folder 2.

from os import listdir
from os.path import isfile, join

BASE_INPUT_DIR = "./folder1/"

def returnListOfFilePaths(folderPath):
    fileInfo = []
    listOfFileNames = [fileName for fileName in listdir(folderPath) if isfile(join(folderPath, fileName))]
    listOfFilePaths = [join(folderPath, fileName) for fileName in listdir(folderPath) if isfile(join(folderPath, fileName))]
    fileInfo.append(listOfFileNames)
    fileInfo.append(listOfFilePaths)
    return fileInfo

fileNames, filePaths = returnListOfFilePaths(folder1)
print(fileNames, "\n", filePaths)

def create_docContentDict(filePaths):
    rawContentDict = {}
    for filePath in filePaths:
        with open(filePath, "r") as ifile:
            fileContent = ifile.read()
        rawContentDict[filePath] = fileContent
    return rawContentDict
rawContentDict = create_docContentDict(filePaths)
print(rawContentDict)

I did some changes, but still getting docs from only one directory. here's my changes

BASE_INPUT_DIR = "./folder1/"
BASE_INPUT_DIR2 = "./folder2/"

def returnListOfFilePaths(folderPath,folderPath2):
    fileInfo = []
    fileInfo2 = []
    
    listOfFileNames = [fileName for fileName in listdir(folderPath) if isfile(join(folderPath, fileName))]
    listOfFilePaths = [join(folderPath, fileName) for fileName in listdir(folderPath) if isfile(join(folderPath, fileName))]
    
    listOfFileNames2 = [fileName for fileName in listdir(folderPath2) if isfile(join(folderPath2, fileName))]
    listOfFilePaths2 = [join(folderPath2, fileName) for fileName in listdir(folderPath2) if isfile(join(folderPath2, fileName)
                                                                                                  )]
    fileInfo.append(listOfFileNames)
    fileInfo.append(listOfFilePaths)
    fileInfo2.append(listOfFileNames2)
    fileInfo2.append(listOfFilePaths2)
    return fileInfo
    return fileinfo2


fileNames, filePaths = returnListOfFilePaths(BASE_INPUT_DIR,BASE_INPUT_DIR2  )
print(fileNames, "\n", filePaths)

def create_docContentDict(filePaths):
    rawContentDict = {}
    for filePath in filePaths:
        with open(filePath, "r") as ifile:
            fileContent = ifile.read()
        rawContentDict[filePath] = fileContent
    return rawContentDict
rawContentDict = create_docContentDict(filePaths)
print(rawContentDict)

Thanks

0

There are 0 answers