157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255 | @app.task(bind=True)
def import_new_meshes(self):
"""Look for new meshes and insert them into the database."""
if settings.MESH_METADATA_DIR is None:
raise ValueError("MESH_METADATA_DIR has not been set.")
# find the latest metadata file
files = os.listdir(settings.MESH_METADATA_DIR)
file_list = [
os.path.join(settings.MESH_METADATA_DIR, file)
for file in files
if file.startswith("upload_metadata_") and file.endswith(".yaml.gz")
]
if len(file_list) == 0:
msg = "Upload metadata file not found."
logger.error(msg)
return
latest_metadata_file = max(file_list, key=os.path.getctime)
# load in the metadata
logger.info(
f"Loading metadata file from {os.path.join(settings.MESH_METADATA_DIR,latest_metadata_file)}"
)
with gzip.open(latest_metadata_file, "rb") as f:
metadata = yaml.load(f.read(), Loader=yaml.Loader)
meshes_added = []
for record in metadata["records"]:
# we only want the vessel json files
if not bool(re.search(VESSEL_MESH_FILENAME_PATTERN, record["filepath"])):
continue
# extract the filename from the filepath
mesh_filename = record["filepath"].split("/")[-1]
# load in the mesh json
try:
zipped_filename = mesh_filename + ".gz"
with gzip.open(
Path(settings.MESH_DIR, zipped_filename), "rb"
) as gzipped_mesh:
mesh_json = json.load(gzipped_mesh)
except FileNotFoundError:
logger.warning(f"{zipped_filename} not found. Skipping.")
continue
except PermissionError:
logger.warning(
f"Can't read {zipped_filename} due to permission error. File may still be transferring. Skipping."
)
continue
# write out the unzipped mesh to temp file
tfile = tempfile.NamedTemporaryFile(mode="w+", delete=True)
json.dump(mesh_json, tfile, indent=4)
tfile.flush()
md5 = calculate_md5(tfile.name)
# cross reference md5 hash from file record in metadata to actual file on disk
if md5 != record["md5"]:
logger.warning(f"Mesh file md5: {md5}\n\
does not match\n\
Metadata md5: {record['md5']}\n\
Skipping.")
# if md5 hash from metadata file does not match that of the file itself,
# there may have been a filename clash, skip this one.
continue
# create an entry in the database
mesh, created = Mesh.objects.get_or_create(
md5=md5,
defaults={
"name": mesh_filename,
"valid_date_start": datetime.datetime.strptime(
mesh_json["config"]["mesh_info"]["region"]["start_time"], "%Y-%m-%d"
).replace(tzinfo=datetime.timezone.utc),
"valid_date_end": datetime.datetime.strptime(
mesh_json["config"]["mesh_info"]["region"]["end_time"], "%Y-%m-%d"
).replace(tzinfo=datetime.timezone.utc),
"created": datetime.datetime.strptime(
record["created"], "%Y%m%dT%H%M%S"
).replace(tzinfo=datetime.timezone.utc),
"json": mesh_json,
"meshiphi_version": record["meshiphi"],
"lat_min": record["latlong"]["latmin"],
"lat_max": record["latlong"]["latmax"],
"lon_min": record["latlong"]["lonmin"],
"lon_max": record["latlong"]["lonmax"],
},
)
if created:
logger.info(
f"Adding new mesh to database: {mesh.id} {mesh.name} {mesh.created}"
)
meshes_added.append(
{"id": mesh.id, "md5": record["md5"], "name": mesh.name}
)
return meshes_added
|