I'm trying to reformat some polygon coordinate data into a GeoJSON format usable by QuPath. I feel like I'm 90% the way there, but I'm still struggling with with dealing with holes in polygons, as well as multipolygons in general. Does anybody have any experience changing polygon and multipolygon coordinates to a GeoJSON format and if so do you have a simpler way to do it than what I've been using?
Here's the general format of the data.
| object | image_id | polygon | tissuetype |
|---|---|---|---|
| 8 | USL-2022-41668_3 | POLYGON ((9464.875 5387, 9464.875 5315, 9372.25 5315, 9372.25 5387, 9464.875 5387)) | stroma |
| 20 | USL-2022-41668_3 | POLYGON ((12811.75 7772.375, 12749.125 7772.375, 12749.125 7849.0625, 12811.75 7849.0625, 12811.75 7772.375), (12779 7790.5, 12779 7789.5, 12779 7788.5, 12779 7787.5, 12779.5 7787, 12780 7786.5, 12780.5 7786, 12781 7785.5, 12781.5 7785, 12782.5 7785, 12783 7784.5, 12783.5 7784, 12784 7783.5, 12784.5 7783, 12785.5 7783, 12786.5 7783, 12787 7782.5, 12787 7781.5, 12787 7780.5, 12787 7779.5, 12787 7778.5, 12787.5 7778, 12788 7777.5, 12788.5 7777, 12789 7776.5, 12789.5 7776, 12790 7775.5, 12790.5 7775, 12791 7774.5, 12791.5 7774, 12792 7773.5, 12792.5 7773, 12793.5 7773, 12794.5 7773, 12795.5 7773, 12796.5 7773, 12797.5 7773, 12798.5 7773, 12799.5 7773, 12800.5 7773, 12801.5 7773, 12802.5 7773, 12803.5 7773, 12804 7773.5, 12804.5 7774, 12805 7774.5, 12805.5 7775, 12806 7775.5, 12806.5 7776, 12807 7776.5, 12807.5 7777, 12808 7777.5, 12808.5 7778, 12809 7778.5, 12809 7779.5, 12809 7780.5, 12809 7781.5, 12809 7782.5, 12809 7783.5, 12809 7784.5, 12809 7785.5, 12809 7786.5, 12809 7787.5, 12809 7788.5, 12809 7789.5, 12809 7790.5, 12809 7791.5, 12808.5 7792, 12808 7792.5, 12807.5 7793, 12807 7793.5, 12807 7794.5, 12807 7795.5, 12807 7796.5, 12806.5 7797, 12806 7797.5, 12805.5 7798, 12805 7798.5, 12804.5 7799, 12804 7799.5, 12803.5 7800, 12803 7800.5, 12802.5 7801, 12801.5 7801, 12800.5 7801, 12800 7801.5, 12799.5 7802, 12798.5 7802, 12798 7802.5, 12797.5 7803, 12796.5 7803, 12795.5 7803, 12794.5 7803, 12793.5 7803, 12792.5 7803, 12791.5 7803, 12790.5 7803, 12789.5 7803, 12788.5 7803, 12788 7802.5, 12787.5 7802, 12787 7801.5, 12786.5 7801, 12786 7800.5, 12785.5 7800, 12785 7799.5, 12784.5 7799, 12784 7798.5, 12783.5 7798, 12783 7797.5, 12783 7796.5, 12782.5 7796, 12782 7795.5, 12781.5 7795, 12781 7794.5, 12780.5 7794, 12780 7793.5, 12779.5 7793, 12779 7792.5, 12779 7791.5, 12779 7790.5)) | epithelium |
| 67 | USL-2022-41668_3 | MULTIPOLYGON (((12313.5 7359, 12313 7358.5, 12312.5 7358, 12312 7357.5, 12311.5 7357, 12311 7356.5, 12311 7355.5, 12311 7354.5, 12311 7353.5, 12311 7352.5, 12311.5 7352, 12312 7351.5, 12312.5 7351, 12313 7350.5, 12313.5 7350, 12314 7349.5, 12314.5 7349, 12315 7348.5, 12315.5 7348, 12316 7347.5, 12316.5 7347, 12317.5 7347, 12317.5 7340, 12317 7339.5, 12317 7339, 12308.5 7339, 12308 7339.5, 12307.5 7340, 12307 7340.5, 12306.5 7341, 12306 7341.5, 12305.5 7342, 12305 7342.5, 12305 7343.5, 12305 7344.5, 12305 7345.5, 12305 7346.5, 12305 7347.5, 12305 7348.5, 12305 7349.5, 12304.5 7350, 12304 7350.5, 12303.5 7351, 12303 7351.5, 12302.5 7352, 12302 7352.5, 12301.5 7353, 12301 7353.5, 12300.5 7354, 12300.02805280528 7354.471947194719, 12317.5 7377.412213740458, 12317.5 7362, 12317 7361.5, 12316.5 7361, 12315.5 7361, 12315 7360.5, 12314.5 7360, 12314 7359.5, 12313.5 7359)), ((12289.5 7340, 12290 7339.5, 12290 7339, 12288.244186046511 7339, 12289.219471947195 7340.2805280528055, 12289.5 7340))) | epithelium |
And here's the script so far.
import json
import csv
import pathlib
import os
# Setting working directory
obj_path = pathlib.Path("D:/ultivue/1604-2201 Core 8plex (All Runs) Region Object Data/run 2")
os.chdir(obj_path)
# Input file
in_file = "D:/ultivue/1604-2201 Core 8plex (All Runs) Region Object Data/run 2/USL-2022-41675_3_rc_outside_Exclusion_regions.csv"
# Read polygon coordinates from a CSV file
coordinates_stroma = []
coordinates_epithelium = []
with open(in_file, "r") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
polygon_str = row["polygon"]
tissue_str = row["tissuetype"]
if polygon_str.startswith("POLYGON"):
# print(polygon_str)
polygon_str = polygon_str.replace("POLYGON ((", "").replace("))", "") #.replace("), (", ", ") # WILL NEED TO CHANGE DEPENDING ON ULTIVUE RESPONSE
if "), (" in polygon_str:
polygon_parts = polygon_str.split("), (")
polygon_coords = []
for part in polygon_parts:
part_coords = [list(map(float, coord.split())) for coord in part.split(",")]
polygon_coords.append([part_coords])
else:
polygon_coords = [list(map(float, coord.split())) for coord in polygon_str.split(",")]
if tissue_str == "stroma":
coordinates_stroma.append([polygon_coords])
else:
coordinates_epithelium.append([polygon_coords])
elif polygon_str.startswith("MULTIPOLYGON"):
# print(polygon_str)
multipolygon_str = polygon_str.replace("MULTIPOLYGON (((", "").replace(")))", "") # CHECK THIS LATER
multipolygon_parts = multipolygon_str.split(")), ((")
multipolygon_coords = []
for part in multipolygon_parts:
polygon_coords = [list(map(float, coord.split())) for coord in part.split(", ")]
multipolygon_coords.append([polygon_coords])
if tissue_str == "stroma":
coordinates_stroma.append([multipolygon_coords])
else:
coordinates_epithelium.append([multipolygon_coords])
# Create a GeoJSON dictionary for stroma
stroma_dict = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"id": row["image_id"] + "_stroma",
"geometry": {
"type": "MultiPolygon",
"coordinates": coordinates_stroma
},
"properties": {
"objectType": "annotation",
"classification": {
"name": "Stroma",
"color": [150, 200, 150]
}
}
}
]
}
# Create a GeoJSON dictionary for epithelium
epithelium_dict = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"id": row["image_id"] + "_epithelium",
"geometry": {
"type": "MultiPolygon",
"coordinates": coordinates_epithelium
},
"properties": {
"objectType": "annotation",
"classification": {
"name": "Tumor",
"color": [255, 179, 102]
}
}
}
]
}
# Convert the dictionary to a JSON string
geojson_stroma_str = json.dumps(stroma_dict, indent=2)
geojson_epithelium_str = json.dumps(epithelium_dict, indent=2)
# print(geojson_epithelium_str)
stroma_filename = row["image_id"] + "_stroma.geojson"
epithelium_filename = row["image_id"] + "_epithelium.geojson"
# # Save the JSON string to a file
with open(stroma_filename, "w") as f:
print("Writing to " + stroma_filename)
f.write(geojson_stroma_str)
with open(epithelium_filename, "w") as f:
print("Writing to " + epithelium_filename)
f.write(geojson_epithelium_str)
print("Finished")
I managed to solve this, albeit with some code I found elsewhere