This changes the data passed back by the foreach function and the data that is passed to the parser. It creates a structure containing everything needed to parse the dive. Pointers were removed so that the data can be stored and parsed later.
Fixed a typo in COCHRAN_TIMESTAMP_OFFSET. Fixed a bug that occured in calculating NDL and Deco time if an event occurred between samples. Added start temperature to Commander parse. Made corrupt dive recovery cleaner. Cleaned up many compiler warnings (-Wall) Cleaned up some whitespace Re-wrapped some lines. Removed extraneous parenthesis not needed with & operator. Seperated device configuration from device handle (more or less) Cleaned up sample buffer wrap-around logic and used better var naming. Fixed typo in CMD_START_OFFSET --- src/cochran_cmdr_parser.c | 66 +++++----- src/cochran_commander.c | 288 ++++++++++++++++++++++++----------------- src/cochran_commander.h | 59 +++++---- src/cochran_commander_parser.c | 57 +++++--- src/cochran_commander_parser.h | 2 + src/cochran_emc_parser.c | 182 ++++++++++++-------------- 6 files changed, 353 insertions(+), 301 deletions(-)
diff --git a/src/cochran_cmdr_parser.c b/src/cochran_cmdr_parser.c index de25912..b2b9748 100644 --- a/src/cochran_cmdr_parser.c +++ b/src/cochran_cmdr_parser.c @@ -35,15 +35,12 @@ #include "cochran_commander_parser.h"
-#define SZ_SAMPLE 2 - - dc_status_t cochran_cmdr_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, unsigned int flags, void *value) { - cochran_data_t *data = (cochran_data_t *) abstract->data; - unsigned char *log = data->current_log; + const unsigned char *data = abstract->data; + const unsigned char *log = data + COCHRAN_MODEL_SIZE;
dc_gasmix_t *gasmix = (dc_gasmix_t *) value; dc_salinity_t *water = (dc_salinity_t *) value; @@ -53,31 +50,35 @@ cochran_cmdr_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, case DC_FIELD_TEMPERATURE_SURFACE: *((unsigned int*) value) = ((float) log[CMD_START_TEMP] - 32) / 1.8; case DC_FIELD_TEMPERATURE_MINIMUM: - *((unsigned int*) value) = ((float) log[CMD_MIN_TEMP] / 2 + 20 - 32) / 1.8; + *((unsigned int*) value) = ((float) log[CMD_MIN_TEMP] / 2 + + 20 - 32) / 1.8; case DC_FIELD_TEMPERATURE_MAXIMUM: - *((unsigned int*) value) = ((float) log[CMD_MAX_TEMP] / 2 + 20 - 32) / 1.8; + *((unsigned int*) value) = ((float) log[CMD_MAX_TEMP] / 2 + + 20 - 32) / 1.8; case DC_FIELD_DIVETIME: *((unsigned int *) value) = array_uint16_le (log + EMC_BT) * 60; break; case DC_FIELD_MAXDEPTH: - *((double *) value) = (float) array_uint16_le (log + CMD_MAX_DEPTH) / 4 * FEET; + *((double *) value) = (float) array_uint16_le (log + CMD_MAX_DEPTH) + / 4 * FEET; break; case DC_FIELD_AVGDEPTH: - *((double *) value) = (float) array_uint16_le (log + CMD_AVG_DEPTH) / 4 * FEET; + *((double *) value) = (float) array_uint16_le (log + CMD_AVG_DEPTH) + / 4 * FEET; break; case DC_FIELD_GASMIX_COUNT: *((unsigned int *) value) = 2; break; case DC_FIELD_GASMIX: - gasmix->oxygen = (double) array_uint16_le ((char *)log + gasmix->oxygen = (double) array_uint16_le (log + CMD_O2_PERCENT + 2 * flags) / 256 / 100; gasmix->helium = 0; gasmix->nitrogen = 1.0 - gasmix->oxygen - gasmix->helium; break; case DC_FIELD_SALINITY: // 0 = low conductivity, 1 = high, maybe there's a 2? - water->type = ( (log[CMD_WATER_CONDUCTIVITY] & 0x3) == 0 ? DC_WATER_FRESH - : DC_WATER_SALT ); + water->type = ( (log[CMD_WATER_CONDUCTIVITY] & 0x3) == 0 + ? DC_WATER_FRESH : DC_WATER_SALT ); water->density = 1000 + 12.5 * (log[CMD_WATER_CONDUCTIVITY] & 0x3); break; case DC_FIELD_ATMOSPHERIC: @@ -97,11 +98,12 @@ dc_status_t cochran_cmdr_parser_samples_foreach (dc_parser_t *abstract, dc_sample_callback_t callback, void *userdata) { - cochran_data_t *data = (cochran_data_t *)abstract->data; - unsigned char *sdata = data->current_sample; - unsigned char *log = data->current_log; - unsigned int size = data->current_sample_size; - unsigned char *s; + const unsigned char *log = abstract->data + COCHRAN_MODEL_SIZE; + const unsigned char *samples = log + COCHRAN_CMDR_LOG_SIZE; + + unsigned int size = abstract->size - COCHRAN_MODEL_SIZE + - COCHRAN_CMDR_LOG_SIZE; + const unsigned char *s;
dc_sample_value_t sample = {0}, empty_sample = {0}; unsigned int time = 0, last_sample_time; @@ -109,8 +111,10 @@ cochran_cmdr_parser_samples_foreach (dc_parser_t *abstract, double temperature; double depth; double ascent_rate; - unsigned char deco_obligation = 0; - unsigned int deco_time = 0; + unsigned char corrupt_dive = 0; + + if (array_uint32_le(log + COCHRAN_CMDR_LOG_SIZE / 2) == 0xFFFFFFFF) + corrupt_dive = 1;
// Cochran samples depth every second and varies between ascent rate // and temp ever other second. @@ -124,8 +128,8 @@ cochran_cmdr_parser_samples_foreach (dc_parser_t *abstract, sample.depth = depth * FEET; if (callback) callback (DC_SAMPLE_DEPTH, sample, userdata);
- // TODO: sample.temperature = (log + CMD_START_TEMP - 32) / 1.8; - // TODO: if (callback) callback (DC_SAMPLE_TEMPERATURE, sample, userdata); + sample.temperature = (float) *(log + CMD_START_TEMP) - 32 / 1.8; + if (callback) callback (DC_SAMPLE_TEMPERATURE, sample, userdata);
while (offset < size) { sample = empty_sample; @@ -137,13 +141,10 @@ cochran_cmdr_parser_samples_foreach (dc_parser_t *abstract, callback (DC_SAMPLE_TIME, sample, userdata); }
- if (offset > data->sample_memory_end_address) - s = sdata + offset - data->sample_memory_end_address - 1; - else - s = sdata + offset; + s = samples + offset;
// If corrupt_dive end before offset - if (data->corrupt_dive) { + if (corrupt_dive) { if ( s[0] == 0x10 || s[0] == 0xFF || s[0] == 0xA8) { @@ -155,17 +156,12 @@ cochran_cmdr_parser_samples_foreach (dc_parser_t *abstract,
// Check for event if (s[0] & 0x80) { - offset += cochran_commander_handle_event(abstract, callback, userdata, - s[0], offset, time); - if (s[0] == 0xC5) - deco_obligation = 1; // Deco obligation begins - else if (s[0] == 0xC8) - deco_obligation = 0; // Deco obligation ends - + offset += cochran_commander_handle_event(abstract, callback, + userdata, s[0], offset, time); continue; }
- + // Depth is logged as change in feet, bit 0x40 means negative depth depth += (float) (s[0] & 0x3F) / 4 * (s[0] & 0x40 ? -1 : 1); sample.depth = depth * FEET; @@ -186,7 +182,7 @@ cochran_cmdr_parser_samples_foreach (dc_parser_t *abstract, }
time ++; - offset += SZ_SAMPLE; + offset += COCHRAN_CMDR_SAMPLE_SIZE; }
return DC_STATUS_SUCCESS; diff --git a/src/cochran_commander.c b/src/cochran_commander.c index e8884c1..fdcac67 100644 --- a/src/cochran_commander.c +++ b/src/cochran_commander.c @@ -42,13 +42,17 @@ static const dc_device_vtable_t cochran_commander_device_vtable = { DC_FAMILY_COCHRAN_COMMANDER, cochran_commander_device_set_fingerprint, /* set_fingerprint */ cochran_commander_device_read, /* read */ - NULL, /* write */ + NULL, /* write */ cochran_commander_device_dump, /* dump */ cochran_commander_device_foreach, /* foreach */ cochran_commander_device_close /* close */ };
+static dc_status_t +cochran_read_id(dc_device_t *device); + + dc_status_t cochran_packet (cochran_device_t *device, const unsigned char command[], unsigned int csize, unsigned char answer[], unsigned int asize, @@ -76,9 +80,9 @@ cochran_packet (cochran_device_t *device, const unsigned char command[],
// Weird but I only get the right result when I do it twice // Rates are odd, like 825600 for the EMC, 115200 for commander - serial_configure(device->port, device->data.high_baud, 8, + serial_configure(device->port, device->data.conf.high_baud, 8, SERIAL_PARITY_NONE, 2, SERIAL_FLOWCONTROL_NONE); - serial_configure(device->port, device->data.high_baud, 8, + serial_configure(device->port, device->data.conf.high_baud, 8, SERIAL_PARITY_NONE, 2, SERIAL_FLOWCONTROL_NONE); }
@@ -202,7 +206,8 @@ cochran_commander_device_open (dc_device_t **out, dc_context_t *context, device->progress = NULL; device->data.logbook = NULL; device->data.sample = NULL; - cochran_commander_device_set_fingerprint((dc_device_t *) device, "", 0); + cochran_commander_device_set_fingerprint((dc_device_t *) device, + NULL, 0);
rc = cochran_commander_serial_open(device, context); if (rc != DC_STATUS_SUCCESS) @@ -219,8 +224,13 @@ cochran_commander_device_open (dc_device_t **out, dc_context_t *context, }
// Check ID - if ((device->data.model & 0xFF0000) == COCHRAN_MODEL_UNKNOWN) { - ERROR (context, "Device not recognized."); + if ((device->data.conf.model & 0xFF0000) == COCHRAN_MODEL_UNKNOWN) { + ERROR (context, + "Unknown Cochran model %02x %02x %02x %02x %02x %02x %02x %02x", + *(device->data.id + 0x3B), *(device->data.id + 0x3B + 1), + *(device->data.id + 0x3B + 2), *(device->data.id + 0x3B + 3), + *(device->data.id + 0x3B + 4), *(device->data.id + 0x3B + 5), + *(device->data.id + 0x3B + 6), *(device->data.id + 0x3B + 7)); serial_close (device->port); free (device); return DC_STATUS_UNSUPPORTED; @@ -257,15 +267,15 @@ cochran_commander_device_set_fingerprint (dc_device_t *abstract, const unsigned char data[], unsigned int size) { cochran_device_t *device = (cochran_device_t *) abstract; - cochran_data_t *d = &(device->data); + cochran_data_t *d = &device->data;
if (size && size != sizeof (d->fingerprint)) return DC_STATUS_INVALIDARGS;
if (size) - memcpy (&(d->fingerprint), data, sizeof (d->fingerprint)); + memcpy (&d->fingerprint, data, sizeof (d->fingerprint)); else - memset (&(d->fingerprint), 0xFF, sizeof (d->fingerprint)); + memset (&d->fingerprint, 0xFF, sizeof (d->fingerprint));
return DC_STATUS_SUCCESS; } @@ -281,7 +291,7 @@ cochran_commander_device_read (dc_device_t *abstract, unsigned int address, unsigned char command[10]; unsigned char command_size;
- switch (device->data.address_length) + switch (device->data.conf.address_length) { case COCHRAN_ADDRESS_LENGTH_32: // EMC uses 32 bit addressing @@ -322,71 +332,65 @@ cochran_commander_device_read (dc_device_t *abstract, unsigned int address, }
-static void -cochran_set_device_config (cochran_device_t *device) +void +cochran_commander_get_config (const unsigned char *model, + cochran_config_t *conf) { - dc_device_t *abstract = (dc_device_t *) device; // Determine model - if (memcmp(device->data.id + 0x3B, "AM2315\xA3\x71", 8) == 0) + if (memcmp(model, "AM2315\xA3\x71", 8) == 0) { - device->data.model = COCHRAN_MODEL_EMC_20; - device->data.log_size = 512; - device->data.sample_memory_start_address = 0x94000; - device->data.dive_num_ptr = 0x56; - device->data.dive_count_ptr = 0xD2; - device->data.dive_count_endian = COCHRAN_LE_TYPE; - device->data.sample_end_ptr = 256; - device->data.log_pre_dive_ptr = 30; - device->data.log_end_dive_ptr = 256; - device->data.last_interdive_ptr = 233; - device->data.last_entry_ptr = 194; - device->data.date_format = COCHRAN_DATE_FORMAT_SMHDMY; - device->data.address_length = COCHRAN_ADDRESS_LENGTH_32; - device->data.high_baud = 825600; - } - else if (memcmp(device->data.id + 0x3B, "AMA315\xC3\xC5", 8) == 0) + conf->model = COCHRAN_MODEL_EMC_20; + conf->log_size = 512; + conf->sample_memory_start_address = 0x94000; + conf->dive_num_ptr = 0x56; + conf->dive_count_ptr = 0xD2; + conf->dive_count_endian = COCHRAN_LE_TYPE; + conf->sample_end_ptr = 256; + conf->log_pre_dive_ptr = 30; + conf->log_end_dive_ptr = 256; + conf->last_interdive_ptr = 233; + conf->last_entry_ptr = 194; + conf->date_format = COCHRAN_DATE_FORMAT_SMHDMY; + conf->address_length = COCHRAN_ADDRESS_LENGTH_32; + conf->high_baud = 825600; + } + else if (memcmp(model, "AMA315\xC3\xC5", 8) == 0) { - device->data.model = COCHRAN_MODEL_EMC_16; - device->data.log_size = 512; - device->data.sample_memory_start_address = 0x94000; - device->data.dive_num_ptr = 0x56; - device->data.dive_count_ptr = 0xD2; - device->data.dive_count_endian = COCHRAN_LE_TYPE; - device->data.sample_end_ptr = 256; - device->data.log_pre_dive_ptr = 30; - device->data.log_end_dive_ptr = 256; - device->data.last_interdive_ptr = 233; - device->data.last_entry_ptr = 194; - device->data.date_format = COCHRAN_DATE_FORMAT_SMHDMY; - device->data.address_length = COCHRAN_ADDRESS_LENGTH_32; - device->data.high_baud = 825600; - } - else if (memcmp(device->data.id + 0x3B, "AM\x11""2212\x02", 8) == 0) + conf->model = COCHRAN_MODEL_EMC_16; + conf->log_size = 512; + conf->sample_memory_start_address = 0x94000; + conf->dive_num_ptr = 0x56; + conf->dive_count_ptr = 0xD2; + conf->dive_count_endian = COCHRAN_LE_TYPE; + conf->sample_end_ptr = 256; + conf->log_pre_dive_ptr = 30; + conf->log_end_dive_ptr = 256; + conf->last_interdive_ptr = 233; + conf->last_entry_ptr = 194; + conf->date_format = COCHRAN_DATE_FORMAT_SMHDMY; + conf->address_length = COCHRAN_ADDRESS_LENGTH_32; + conf->high_baud = 825600; + } + else if (memcmp(model, "AM\x11""2212\x02", 8) == 0) { - device->data.model = COCHRAN_MODEL_COMMANDER_AIR_NITROX; - device->data.log_size = 256; - device->data.sample_memory_start_address = 0x20000; - device->data.dive_num_ptr = 0x46; - device->data.dive_count_ptr = 0x46; - device->data.dive_count_endian = COCHRAN_BE_TYPE; - device->data.sample_end_ptr = 256; - device->data.log_pre_dive_ptr = 30; - device->data.log_end_dive_ptr = 128; - device->data.last_interdive_ptr = 167; - device->data.last_entry_ptr = -1; - device->data.date_format = COCHRAN_DATE_FORMAT_MSDHYM; - device->data.address_length = COCHRAN_ADDRESS_LENGTH_24; - device->data.high_baud = 115200; + conf->model = COCHRAN_MODEL_COMMANDER_AIR_NITROX; + conf->log_size = 256; + conf->sample_memory_start_address = 0x20000; + conf->dive_num_ptr = 0x46; + conf->dive_count_ptr = 0x46; + conf->dive_count_endian = COCHRAN_BE_TYPE; + conf->sample_end_ptr = 256; + conf->log_pre_dive_ptr = 30; + conf->log_end_dive_ptr = 128; + conf->last_interdive_ptr = 167; + conf->last_entry_ptr = -1; + conf->date_format = COCHRAN_DATE_FORMAT_MSDHYM; + conf->address_length = COCHRAN_ADDRESS_LENGTH_24; + conf->high_baud = 115200; } else { - device->data.model = 0; - ERROR (abstract->context, - "Unknown Cochran model %02x %02x %02x %02x %02x %02x %02x %02x", - *(device->data.id + 0x3B), *(device->data.id + 0x3B + 1), - *(device->data.id + 0x3B + 2), *(device->data.id + 0x3B + 3), - *(device->data.id + 0x3B + 4), *(device->data.id + 0x3B + 5), - *(device->data.id + 0x3B + 6), *(device->data.id + 0x3B + 7)); + conf->model = 0; }
return; @@ -404,7 +408,7 @@ cochran_read_id (dc_device_t *abstract) if (rc != DC_STATUS_SUCCESS) return rc;
- if (strncmp(device->data.id, "(C)", 3) != 0) { + if (strncmp((const char *)device->data.id, "(C)", 3) != 0) { // It's a Commander, read again memcpy(device->data.id0, device->data.id, 67);
@@ -416,7 +420,8 @@ cochran_read_id (dc_device_t *abstract) return rc; }
- cochran_set_device_config(device); + cochran_commander_get_config(device->data.id + 0x3B, &device->data.conf); +
return DC_STATUS_SUCCESS; } @@ -427,10 +432,11 @@ cochran_read_config (dc_device_t *abstract) { cochran_device_t *device = (cochran_device_t *) abstract; cochran_data_t *data = &device->data; + dc_status_t rc; unsigned char command[2] = { 0x96, 0x00 };
- if ((data->model & 0xFF0000) == COCHRAN_MODEL_EMC_FAMILY) + if ((data->conf.model & 0xFF0000) == COCHRAN_MODEL_EMC_FAMILY) data->config_count = 2; else data->config_count = 4; @@ -454,7 +460,7 @@ cochran_read_misc (dc_device_t *abstract)
unsigned char command[7] = { 0x89, 0x05, 0x00, 0x00, 0x00, 0xDC, 0x05 };
- switch (device->data.model & 0xFF0000) + switch (device->data.conf.model & 0xFF0000) { case COCHRAN_MODEL_COMMANDER_FAMILY: command[2] = 0xCA; @@ -486,7 +492,7 @@ static dc_status_t cochran_read_logbook (dc_device_t *abstract) { cochran_device_t *device = (cochran_device_t *) abstract; - cochran_data_t *d = &(device->data); + cochran_data_t *d = &device->data; dc_status_t rc;
if (d->logbook) @@ -516,7 +522,8 @@ cochran_read_logbook (dc_device_t *abstract) cochran_commander_serial_setup(device, abstract->context);
// Request log book - rc = cochran_commander_device_read(abstract, 0, d->logbook, d->logbook_size); + rc = cochran_commander_device_read(abstract, 0, d->logbook, + d->logbook_size);
device->progress->current = d->logbook_size; device_event_emit (abstract, DC_EVENT_PROGRESS, device->progress); @@ -530,14 +537,15 @@ static void cochran_find_fingerprint(dc_device_t *abstract) { cochran_device_t *device = (cochran_device_t *) abstract; - cochran_data_t *d = (cochran_data_t *) &(device->data); + cochran_data_t *d = (cochran_data_t *) &device->data; + cochran_config_t *conf = &d->conf;
// Skip to fingerprint to reduce time d->fp_dive_num = d->dive_count - 1;
- while (d->fp_dive_num >= 0 && memcmp(&(d->fingerprint), - d->logbook + d->fp_dive_num * d->log_size - + d->dive_num_ptr, + while (d->fp_dive_num >= 0 && memcmp(&d->fingerprint, + d->logbook + d->fp_dive_num * conf->log_size + + conf->dive_num_ptr, sizeof(d->fingerprint))) d->fp_dive_num--; } @@ -547,8 +555,9 @@ static void cochran_get_sample_parms(dc_device_t *abstract) { cochran_device_t *device = (cochran_device_t *) abstract; - cochran_data_t *d = (cochran_data_t *) &(device->data); - unsigned int pre_dive_offset, end_dive_offset; + cochran_data_t *d = (cochran_data_t *) &device->data; + cochran_config_t *conf = &d->conf; + unsigned int pre_dive_offset = 0, end_dive_offset = 0; unsigned int low_offset, high_offset;
// Find lowest and highest offsets into sample data @@ -557,10 +566,10 @@ cochran_get_sample_parms(dc_device_t *abstract)
int i; for (i = d->fp_dive_num + 1; i < d->dive_count; i++) { - pre_dive_offset = array_uint32_le (&(d->logbook[i * d->log_size - + d->log_pre_dive_ptr])); - end_dive_offset = array_uint32_le (&(d->logbook[i * d->log_size - + d->log_end_dive_ptr])); + pre_dive_offset = array_uint32_le (&(d->logbook[i * conf->log_size + + conf->log_pre_dive_ptr])); + end_dive_offset = array_uint32_le (&(d->logbook[i * conf->log_size + + conf->log_end_dive_ptr]));
// Check for ring buffer wrap-around. if (pre_dive_offset > end_dive_offset) @@ -577,8 +586,8 @@ cochran_get_sample_parms(dc_device_t *abstract) // I'll round to 128K, dives longer than 12 hrs aren't likely // and memory in sizes not rounded to 128K might be odd. high_offset = ((pre_dive_offset - 1) & 0xE0000) + 0x20000; - d->sample_memory_end_address = high_offset; - low_offset = d->sample_memory_start_address; + conf->sample_memory_end_address = high_offset; + low_offset = conf->sample_memory_start_address; d->sample_data_offset = low_offset; d->sample_size = high_offset - low_offset; } else if (low_offset < 0xFFFFFFFF && high_offset > 0) { @@ -597,7 +606,7 @@ static dc_status_t cochran_read_samples(dc_device_t *abstract) { cochran_device_t *device = (cochran_device_t *) abstract; - cochran_data_t *d = (cochran_data_t *) &(device->data); + cochran_data_t *d = (cochran_data_t *) &device->data; dc_status_t rc;
@@ -649,7 +658,8 @@ static dc_status_t cochran_commander_device_read_all (dc_device_t *abstract) { cochran_device_t *device = (cochran_device_t *) abstract; - cochran_data_t *d = (cochran_data_t *) &(device->data); + cochran_data_t *d = (cochran_data_t *) &device->data; + cochran_config_t *conf = &d->conf;
dc_status_t rc;
@@ -663,12 +673,12 @@ cochran_commander_device_read_all (dc_device_t *abstract) return rc;
// Determine size of dive list to read. Round up to nearest 16K - if (d->dive_count_endian == COCHRAN_LE_TYPE) - d->dive_count = array_uint16_le (d->config[0] + d->dive_count_ptr); + if (conf->dive_count_endian == COCHRAN_LE_TYPE) + d->dive_count = array_uint16_le (d->config[0] + conf->dive_count_ptr); else - d->dive_count = array_uint16_be (d->config[0] + d->dive_count_ptr); + d->dive_count = array_uint16_be (d->config[0] + conf->dive_count_ptr); - d->logbook_size = ((d->dive_count * d->log_size) & 0xFFFFC000) + d->logbook_size = ((d->dive_count * conf->log_size) & 0xFFFFC000) + 0x4000;
rc = cochran_read_logbook(abstract); @@ -696,11 +706,11 @@ dc_status_t cochran_commander_device_dump (dc_device_t *abstract, dc_buffer_t *data) { cochran_device_t *device = (cochran_device_t *) abstract; - cochran_data_t *d = (cochran_data_t *) &(device->data); + cochran_data_t *d = (cochran_data_t *) &device->data; int ptr = 0;
dc_status_t rc; - char *b; + unsigned char *b; int size;
rc = cochran_commander_device_read_all (abstract); @@ -816,16 +826,42 @@ cochran_commander_device_dump (dc_device_t *abstract, dc_buffer_t *data) }
+/* +* For corrupt dives the end-of-samples pointer is 0xFFFFFFFF +* search for a reasonable size, e.g. using next dive start sample +* or end-of-samples to limit searching for recoverable samples +*/ + +static unsigned int +cochran_guess_sample_end_address(cochran_data_t *data, unsigned int log_num) +{ + cochran_config_t *conf = &data->conf; + const unsigned char *log = data->logbook + data->conf.log_size * log_num; + + if (log_num == data->dive_count) + // Return next usable address from config0 page + return array_uint32_le(data->config[0] + + conf->sample_memory_end_address); + + // Next log's start address + return array_uint32_le(log + conf->log_size + 6); +} +
dc_status_t cochran_commander_device_foreach (dc_device_t *abstract, dc_dive_callback_t callback, void *userdata) { cochran_device_t *device = (cochran_device_t *) abstract; - cochran_data_t *d = &(device->data); - unsigned int sample_start_offset, sample_end_offset; + cochran_data_t *data = &device->data; + cochran_config_t *conf = &data->conf; + + unsigned int sample_start_address, sample_end_address; dc_status_t rc;
+ unsigned char *log, *fingerprint, *sample, *dive; + int sample_size, dive_size; + rc = cochran_commander_device_read_all (abstract);
if (rc != DC_STATUS_SUCCESS) @@ -833,36 +869,54 @@ cochran_commander_device_foreach (dc_device_t *abstract,
// Loop through each dive int i; - for (i = d->dive_count - 1; i > d->fp_dive_num; i--) { + for (i = data->dive_count - 1; i > data->fp_dive_num; i--) { + log = data->logbook + i * conf->log_size;
- d->current_log = d->logbook + i * d->log_size; + sample_start_address = array_uint32_le (log + 6); + sample_end_address = array_uint32_le (log + conf->log_size / 2);
- sample_start_offset = array_uint32_le (d->current_log + 6); - sample_end_offset = array_uint32_le (d->current_log - + d->log_size/2); + if (sample_end_address == 0xFFFFFFFF) + // Corrupt dive, guess the end address + sample_end_address = cochran_guess_sample_end_address(data, i);
- d->current_sample = d->sample + sample_start_offset - - d->sample_data_offset; + sample = data->sample + sample_start_address - data->sample_data_offset;
- // Check for corrupt post-dive section - if (array_uint32_le(d->current_log + d->log_size/2) == 0xFFFFFFFF) - d->corrupt_dive = 1; - else - d->corrupt_dive = 0; + // Determine size of sample + sample_size = sample_end_address - sample_start_address; + if (sample_size < 0) + // Adjust for ring buffer wrap-around + sample_size += conf->sample_memory_end_address + - conf->sample_memory_start_address;
- // Check for ring buffer wrap - if (sample_start_offset > sample_end_offset) - d->current_sample_size = d->sample_memory_end_address - - sample_start_offset + 1 + sample_end_offset - - d->sample_memory_start_address + 1; - else - d->current_sample_size = sample_end_offset - sample_start_offset; + fingerprint = log + conf->dive_num_ptr; + + // Build dive blob + dive_size = COCHRAN_MODEL_SIZE + conf->log_size + sample_size; + dive = malloc(dive_size); + if (dive == NULL) + return DC_STATUS_NOMEMORY;
- d->current_fingerprint = d->current_log + d->dive_num_ptr; + memcpy(dive, data->id + 0x3B, 8); // model string + memcpy(dive + COCHRAN_MODEL_SIZE, log, conf->log_size); + if (sample_start_address <= sample_end_address) { + memcpy(dive + COCHRAN_MODEL_SIZE + conf->log_size, sample, + sample_size); + } else { + // It wrapped the buffer, copy two sections + unsigned int size = conf->sample_memory_end_address + - sample_start_address; + memcpy(dive + COCHRAN_MODEL_SIZE + conf->log_size, sample, size); + memcpy(dive + COCHRAN_MODEL_SIZE + conf->log_size + size, + data->sample, sample_size - size); + }
- if (callback && !callback ((unsigned char *) d, sizeof(device->data), - d->current_fingerprint, sizeof (d->fingerprint), userdata)) + if (callback && !callback ((unsigned char *) dive, dive_size, + fingerprint, COCHRAN_FINGERPRINT_SIZE, userdata)) { + free(dive); return DC_STATUS_SUCCESS; + } + + free(dive); }
return DC_STATUS_SUCCESS; diff --git a/src/cochran_commander.h b/src/cochran_commander.h index e259977..8fa6228 100644 --- a/src/cochran_commander.h +++ b/src/cochran_commander.h @@ -21,7 +21,16 @@
// seconds to add to Cochran time stamps to get unix time // Equivalent to Jan 1, 1992 00:00:00 -#define COCHRAN_TIMESTAMP_OFFSET 694224000 +#define COCHRAN_TIMESTAMP_OFFSET 694242000 + +#define COCHRAN_MODEL_SIZE 8 +#define COCHRAN_FINGERPRINT_SIZE 2 + +#define COCHRAN_EMC_LOG_SIZE 512 +#define COCHRAN_EMC_SAMPLE_SIZE 3 + +#define COCHRAN_CMDR_LOG_SIZE 256 +#define COCHRAN_CMDR_SAMPLE_SIZE 2
#define COCHRAN_LE_TYPE 0 #define COCHRAN_BE_TYPE 1 @@ -42,9 +51,26 @@ typedef enum cochran_model_t { COCHRAN_MODEL_COMMANDER_AIR_NITROX, } cochran_model_t;
-typedef struct cochran_data_t { +// Device configuration items +typedef struct cochran_config_t { cochran_model_t model; + int log_size; + int sample_memory_start_address; + int sample_memory_end_address; + int dive_num_ptr; + int dive_count_ptr; + int dive_count_endian; + int sample_end_ptr; + int log_pre_dive_ptr; + int log_end_dive_ptr; + int last_interdive_ptr; + int last_entry_ptr; + int date_format; + int address_length; + int high_baud; // baud rate to switch to for log/sample download +} cochran_config_t;
+typedef struct cochran_data_t { unsigned char id0[67]; unsigned char id[67]; unsigned char config[4][512]; @@ -56,38 +82,17 @@ typedef struct cochran_data_t { unsigned int config_count;
unsigned short int dive_count; - unsigned char fingerprint[2]; + unsigned char fingerprint[COCHRAN_FINGERPRINT_SIZE]; int fp_dive_num;
unsigned int logbook_size; - unsigned int current_sample_size;
unsigned int sample_data_offset; unsigned int sample_size; unsigned int last_interdive_offset; unsigned int last_entry_offset;
- unsigned char *current_fingerprint; - unsigned char *current_log; - unsigned char *current_sample; - - // Config items - int log_size; - int sample_memory_start_address; - int sample_memory_end_address; - int dive_num_ptr; - int dive_count_ptr; - int dive_count_endian; - int sample_end_ptr; - int log_pre_dive_ptr; - int log_end_dive_ptr; - int last_interdive_ptr; - int last_entry_ptr; - int date_format; - int address_length; - int high_baud; // baud rate to switch to for log/sample download - - unsigned char corrupt_dive; + cochran_config_t conf; } cochran_data_t;
typedef struct cochran_device_t { @@ -106,7 +111,7 @@ typedef struct cochran_device_t { #define CMD_DAY 2 #define CMD_MON 5 #define CMD_YEAR 4 -#define CME_START_OFFSET 6 // 4 bytes +#define CMD_START_OFFSET 6 // 4 bytes #define CMD_WATER_CONDUCTIVITY 24 // 1 byte, 0=low, 2=high #define CMD_START_TEMP 45 // 1 byte, F #define CMD_START_DEPTH 56 // 2 byte, /4=ft @@ -153,8 +158,8 @@ dc_status_t cochran_commander_device_set_fingerprint (dc_device_t *abstract, const unsigned char data[], unsigned int size); dc_status_t cochran_commander_device_read (dc_device_t *abstract, unsigned int address, unsigned char data[], unsigned int size); -static dc_status_t cochran_read_id (dc_device_t *abstract); dc_status_t cochran_commander_device_foreach (dc_device_t *abstract, dc_dive_callback_t callback, void *userdata); dc_status_t cochran_commander_device_dump (dc_device_t *abstract, dc_buffer_t *data); +void cochran_commander_get_config(const unsigned char *model, cochran_config_t *conf); diff --git a/src/cochran_commander_parser.c b/src/cochran_commander_parser.c index 547dad4..8db87cf 100644 --- a/src/cochran_commander_parser.c +++ b/src/cochran_commander_parser.c @@ -107,10 +107,13 @@ dc_status_t cochran_commander_parser_get_datetime (dc_parser_t *abstract, dc_datetime_t *datetime) { - cochran_data_t *data = (cochran_data_t *) abstract->data; - const unsigned char *log = data->current_log; + const unsigned char *data = abstract->data; + const unsigned char *log = data + COCHRAN_MODEL_SIZE; + cochran_config_t conf;
- if (data->date_format == COCHRAN_DATE_FORMAT_SMHDMY) { + cochran_commander_get_config(data, &conf); + + if (conf.date_format == COCHRAN_DATE_FORMAT_SMHDMY) { datetime->second = log[0]; datetime->minute = log[1]; datetime->hour = log[2]; @@ -133,9 +136,11 @@ static dc_status_t cochran_commander_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, unsigned int flags, void *value) { - cochran_data_t *data = (cochran_data_t *) abstract->data; + cochran_config_t conf; + + cochran_commander_get_config(abstract->data, &conf);
- switch (data->model & 0xFF0000) + switch (conf.model & 0xFF0000) { case COCHRAN_MODEL_COMMANDER_FAMILY: return cochran_cmdr_parser_get_field(abstract, type, flags, value); @@ -144,15 +149,19 @@ cochran_commander_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, return cochran_emc_parser_get_field(abstract, type, flags, value); break; } + + return DC_STATUS_UNSUPPORTED; }
static dc_status_t cochran_commander_parser_samples_foreach (dc_parser_t *abstract, dc_sample_callback_t callback, void *userdata) { - cochran_data_t *data = (cochran_data_t *) abstract->data; + cochran_config_t conf;
- switch (data->model & 0xFF0000) + cochran_commander_get_config(abstract->data, &conf); + + switch (conf.model & 0xFF0000) { case COCHRAN_MODEL_COMMANDER_FAMILY: return cochran_cmdr_parser_samples_foreach(abstract, callback, @@ -163,29 +172,37 @@ cochran_commander_parser_samples_foreach (dc_parser_t *abstract, userdata); break; } + + return DC_STATUS_UNSUPPORTED; }
+void +cochran_commander_get_event_info(const unsigned char code, + cochran_events_t *event) +{ + cochran_events_t *events = cochran_events; + unsigned char i = 0; + + while (events[i].code && events[i].code != code) + i++;
+ *event = events[i]; +}
int cochran_commander_handle_event (dc_parser_t *abstract, dc_sample_callback_t callback, void *userdata, unsigned char code, unsigned int offset, unsigned int time) { - cochran_data_t *data = (cochran_data_t *) abstract->data; - cochran_events_t *e = cochran_events; - dc_sample_value_t sample = {0}; + cochran_events_t event;
- unsigned char event_ptr = 0; - - while (e[event_ptr].code && e[event_ptr].code != code) - event_ptr++; + cochran_commander_get_event_info(code, &event);
sample.event.time = 0;
- if (e[event_ptr].code) { - switch (e[event_ptr].code) + if (event.code) { + switch (code) { case 0xAB: // Ceiling decrease // Indicated to lower ceiling by 10 ft (deeper) @@ -199,9 +216,9 @@ cochran_commander_handle_event (dc_parser_t *abstract, break; default: // Don't send known events of type NONE - if (! e[event_ptr].type == SAMPLE_EVENT_NONE) { - sample.event.type = e[event_ptr].type; - sample.event.flags = e[event_ptr].flag; + if (! event.type == SAMPLE_EVENT_NONE) { + sample.event.type = event.type; + sample.event.flags = event.flag; if (callback) callback (DC_SAMPLE_EVENT, sample, userdata); } } @@ -213,5 +230,5 @@ cochran_commander_handle_event (dc_parser_t *abstract, if (callback) callback (DC_SAMPLE_EVENT, sample, userdata); }
- return e[event_ptr].data_bytes; + return event.data_bytes; } diff --git a/src/cochran_commander_parser.h b/src/cochran_commander_parser.h index 314d538..44340b5 100644 --- a/src/cochran_commander_parser.h +++ b/src/cochran_commander_parser.h @@ -97,6 +97,8 @@ dc_status_t cochran_commander_parser_set_data (dc_parser_t *abstract, const unsigned char *data, unsigned int size); dc_status_t cochran_commander_parser_get_datetime (dc_parser_t *abstract, dc_datetime_t *datetime); +void cochran_commander_get_event_info(const unsigned char code, + cochran_events_t *event); int cochran_commander_handle_event (dc_parser_t *abstract, dc_sample_callback_t callback, void *userdata, unsigned char code, unsigned int offset, unsigned int time); diff --git a/src/cochran_emc_parser.c b/src/cochran_emc_parser.c index 8a554cb..5b4af81 100644 --- a/src/cochran_emc_parser.c +++ b/src/cochran_emc_parser.c @@ -52,13 +52,15 @@ dc_status_t cochran_emc_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, unsigned int flags, void *value) { - cochran_data_t *data = (cochran_data_t *) abstract->data; - unsigned char *log = data->current_log; + const unsigned char *log = abstract->data + COCHRAN_MODEL_SIZE; + unsigned char corrupt_dive = 0; + + if (array_uint32_le(log + COCHRAN_EMC_LOG_SIZE / 2) == 0xFFFFFFFF) + corrupt_dive = 1;
dc_gasmix_t *gasmix = (dc_gasmix_t *) value; dc_salinity_t *water = (dc_salinity_t *) value;
- unsigned int dive_time; struct dive_stats stats;
if (value) { @@ -67,28 +69,30 @@ cochran_emc_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, *((unsigned int*) value) = ((float) log[EMC_START_TEMP] - 32) / 1.8; break; case DC_FIELD_TEMPERATURE_MINIMUM: - if (data->corrupt_dive) { + if (corrupt_dive) { cochran_emc_parse_dive_stats(abstract, &stats); *((unsigned int*) value) = stats.min_temp; } else - *((unsigned int*) value) = ((float) log[EMC_MIN_TEMP] / 2 + 20 - 32) / 1.8; + *((unsigned int*) value) = ((float) log[EMC_MIN_TEMP] / 2 + + 20 - 32) / 1.8; break; case DC_FIELD_TEMPERATURE_MAXIMUM: - if (data->corrupt_dive) { + if (corrupt_dive) { cochran_emc_parse_dive_stats(abstract, &stats); *((unsigned int*) value) = stats.max_temp; } else - *((unsigned int*) value) = ((float) log[EMC_MAX_TEMP] / 2 + 20 - 32) / 1.8; + *((unsigned int*) value) = ((float) log[EMC_MAX_TEMP] / 2 + + 20 - 32) / 1.8; break; case DC_FIELD_DIVETIME: - if (data->corrupt_dive) { + if (corrupt_dive) { cochran_emc_parse_dive_stats(abstract, &stats); *((unsigned int*) value) = stats.dive_time; } else *((unsigned int *) value) = array_uint16_le (log + EMC_BT) * 60; break; case DC_FIELD_MAXDEPTH: - if (data->corrupt_dive) { + if (corrupt_dive) { cochran_emc_parse_dive_stats(abstract, &stats); *((unsigned int*) value) = stats.max_depth; } else @@ -96,7 +100,7 @@ cochran_emc_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, * FEET; break; case DC_FIELD_AVGDEPTH: - if (data->corrupt_dive) { + if (corrupt_dive) { cochran_emc_parse_dive_stats(abstract, &stats); *((unsigned int*) value) = stats.avg_depth; } else @@ -107,16 +111,16 @@ cochran_emc_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, *((unsigned int *) value) = 10; break; case DC_FIELD_GASMIX: - gasmix->oxygen = (double) array_uint16_le ((char *) log + gasmix->oxygen = (double) array_uint16_le (log + EMC_O2_PERCENT + 2 * flags) / 256 / 100; - gasmix->helium = (double) array_uint16_le ((char *) log + gasmix->helium = (double) array_uint16_le (log + EMC_HE_PERCENT + 2 * flags) / 256 / 100; gasmix->nitrogen = 1.0 - gasmix->oxygen - gasmix->helium; break; case DC_FIELD_SALINITY: // 0 = low conductivity, 2 = high, maybe there's a 1? - water->type = ( (log[EMC_WATER_CONDUCTIVITY] & 0x3) == 0 ? DC_WATER_FRESH - : DC_WATER_SALT ); + water->type = ( (log[EMC_WATER_CONDUCTIVITY] & 0x3) == 0 + ? DC_WATER_FRESH : DC_WATER_SALT ); water->density = 1000 + 12.5 * (log[EMC_WATER_CONDUCTIVITY] & 0x3); break; case DC_FIELD_ATMOSPHERIC: @@ -131,54 +135,17 @@ cochran_emc_parser_get_field (dc_parser_t *abstract, dc_field_type_t type, return DC_STATUS_SUCCESS; }
-/* -* For corrupt dives the end-of-samples pointer is 0xFFFFFFFF -* search for a reasonable size, e.g. using next dive start sample -* or end-of-samples to limit searching for recoverable samples -*/ -static unsigned int -cochran_emc_guess_sample_size(cochran_data_t *data) -{ - unsigned int size = 0; - unsigned int offset, next_offset, memory_start_offset, memory_end_offset; - - // Get memory boundries - memory_start_offset = array_uint32_le(data->config[0] - + data->sample_memory_start_address); - memory_end_offset = array_uint32_le(data->config[0] - + data->sample_memory_end_address); - - // see if there is a next sample - if (data->current_log - data->logbook + 512 >= data->logbook_size) { - // we are the last log entry, use maximum data - size = memory_end_offset - offset + 1; - } else { - offset = array_uint32_le(data->current_log + 6); - next_offset = array_uint32_le(data->current_log + 512 + 30); - - if (next_offset >= offset) { - size = next_offset - offset + 1; - } else { - // samples wrap to begin of memory - size = memory_end_offset - offset + next_offset - - memory_start_offset + 2; - } - } - - return (size); -} - -
dc_status_t cochran_emc_parser_samples_foreach (dc_parser_t *abstract, dc_sample_callback_t callback, void *userdata) { - cochran_data_t *data = (cochran_data_t *) abstract->data; - unsigned char *sdata = data->current_sample; - unsigned char *log = data->current_log; - unsigned int size = data->current_sample_size; - unsigned char *s; + const unsigned char *data = abstract->data; + const unsigned char *log = data + COCHRAN_MODEL_SIZE; + const unsigned char *samples = log + COCHRAN_EMC_LOG_SIZE; + unsigned int size = abstract->size - COCHRAN_MODEL_SIZE + - COCHRAN_EMC_LOG_SIZE; + const unsigned char *s;
dc_sample_value_t sample = {0}, empty_sample = {0}; unsigned int time = 0, last_sample_time; @@ -188,18 +155,18 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract, double ascent_rate; unsigned char deco_obligation = 0; unsigned int deco_ceiling = 0; - unsigned int deco_time = 0; + unsigned int deco_time; + unsigned char corrupt_dive = 0;
- if (data->corrupt_dive) { - // Size of sample data is wrong, make a guess - size = cochran_emc_guess_sample_size(data); - } + + if (array_uint32_le(log + COCHRAN_EMC_LOG_SIZE / 2) == 0xFFFFFFFF) + corrupt_dive = 1;
/* * Cochran samples depth every second and varies between ascent rate * and temp ever other second. * In the 21st, 22nd, 23rd, 24th samples are NDL remaining, - *deco time and ceiling + * deco time and ceiling */
// Prime with values from the dive log section @@ -224,10 +191,10 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract, callback (DC_SAMPLE_TIME, sample, userdata); }
- s = sdata + offset; + s = samples + offset;
// If corrupt_dive end before offset - if (data->corrupt_dive) { + if (corrupt_dive) { if ( s[0] == 0x10 || s[0] == 0xFF || s[0] == 0xA8) { @@ -239,8 +206,8 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract,
// Check for event if (s[0] & 0x80) { - offset += cochran_commander_handle_event(abstract, callback, userdata, - s[0], offset, time); + offset += cochran_commander_handle_event(abstract, callback, + userdata, s[0], offset, time); switch (s[0]) { case 0xC5: // Deco obligation begins @@ -262,7 +229,7 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract,
sample.deco.type = DC_DECO_DECOSTOP; sample.deco.depth = deco_ceiling * FEET; - deco_time = (array_uint16_le(s + 3) + 1) * 60; + sample.deco.time = (array_uint16_le(s + 3) + 1) * 60; if (callback) callback(DC_SAMPLE_DECO, sample, userdata); break; } @@ -270,7 +237,7 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract, continue; }
- + // Depth is logged as change in feet, bit 0x40 means negative depth depth += (float) (s[0] & 0x3F) / 4 * (s[0] & 0x40 ? -1 : 1); sample.depth = depth * FEET; @@ -290,6 +257,17 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract, if (callback) callback (DC_SAMPLE_TEMPERATURE, sample, userdata); }
+ + // Some samples are split over two seconds + // Skip over event bytes to find the next sample + const unsigned char *n = s + COCHRAN_EMC_SAMPLE_SIZE; + cochran_events_t event; + + while ((*n & 0x80) && n < s + size) { + cochran_commander_get_event_info(*n, &event); + n += event.data_bytes; + } + // Tissue load is in 20 samples, then 2 samples for RBT, // and 2 for ceiling switch (time % 24) @@ -297,13 +275,14 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract, case 20: if (deco_obligation) { /* Deco time for deepest stop, unused */ - deco_time = (s[2] + s[5] * 256 + 1) * 60; + deco_time = (s[2] + n[2] * 256 + 1) * 60; } else { /* Get RBT */ - sample.rbt = s[2] + s[5] * 256 + 1; + sample.rbt = s[2] + n[2] * 256 + 1; if (callback) callback (DC_SAMPLE_RBT, sample, userdata); /* Send deco NDL sample */ - sample.deco.time = sample.rbt * 60; // seconds + sample.deco.type = DC_DECO_NDL; + sample.deco.time = (s[2] + n[2] * 256 + 1) * 60; // seconds sample.deco.depth = 0; if (callback) callback (DC_SAMPLE_DECO, sample, userdata); } @@ -313,14 +292,14 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract, if (deco_obligation) { sample.deco.type = DC_DECO_DECOSTOP; sample.deco.depth = deco_ceiling * FEET; - sample.deco.time = (s[2] + s[5] * 256 + 1) * 60; // minutes + sample.deco.time = (s[2] + n[2] * 256 + 1) * 60; // minutes if (callback) callback (DC_SAMPLE_DECO, sample, userdata); } break; }
time ++; - offset += 3; + offset += COCHRAN_EMC_SAMPLE_SIZE; }
return DC_STATUS_SUCCESS; @@ -329,51 +308,50 @@ cochran_emc_parser_samples_foreach (dc_parser_t *abstract, void cochran_emc_parse_dive_stats (dc_parser_t *abstract, struct dive_stats *stats) { - cochran_data_t *data = (cochran_data_t *) abstract->data; - unsigned char *sdata = data->current_sample; - unsigned char *log = data->current_log; - unsigned int size = data->current_sample_size; - unsigned char *s; + const unsigned char *log = abstract->data + COCHRAN_MODEL_SIZE; + const unsigned char *samples = log + COCHRAN_EMC_LOG_SIZE; + unsigned int size = abstract->size - COCHRAN_MODEL_SIZE + - COCHRAN_EMC_LOG_SIZE; + const unsigned char *s; + unsigned int offset = 0; - cochran_events_t *e = cochran_events; - unsigned char event_ptr; float depth_m = 0, depth, temp; unsigned int time = 0; + unsigned char corrupt_dive = 0;
- // Check for corrupt dive - if (data->corrupt_dive) - { - // It's corrupt, guess at the sample size - size = cochran_emc_guess_sample_size(data); - } + if (array_uint32_le(log + COCHRAN_EMC_LOG_SIZE / 2) == 0xFFFFFFFF) + corrupt_dive = 1; + + // Prime with values from the dive log section + depth = array_uint16_le (log + EMC_START_DEPTH) / 256;
- stats->max_depth = 0; + stats->max_depth = depth; stats->min_temp = 999; stats->avg_depth = 0; stats->max_temp = 0;
while (offset < size) { - s = sdata + offset; + s = samples + offset;
- if ( s[0] == 0x10 - || s[0] == 0xFF - || s[0] == 0xA8) { + if (corrupt_dive) { + if (s[0] == 0x10 + || s[0] == 0xFF + || s[0] == 0xA8) { // End corrupted dive break; + } + if (time > 1 && (s[0] == 0xE3 || s[0] == 0xF3)) + break; }
- if (time > 1 && (s[0] == 0xE3 || s[0] == 0xF3)) - break; - // Check for event if (s[0] & 0x80) { - event_ptr = 0; - while (e[event_ptr].code && e[event_ptr].code != s[0]) - event_ptr++; + cochran_events_t event; + cochran_commander_get_event_info(s[0], &event);
// Advance some bytes - if (e[event_ptr].code) - offset += e[event_ptr].data_bytes; + if (event.code) + offset += event.data_bytes; else offset ++;
@@ -390,7 +368,7 @@ void cochran_emc_parse_dive_stats (dc_parser_t *abstract,
if (time % 2 != 0) { // Temperature logged in half degrees F above 20 - temp = s[1] / 2 + 20; + temp = (float) s[1] / 2 + 20; temp = (temp - 32.0) / 1.8;
if (temp < stats->min_temp) stats->min_temp = temp; @@ -398,7 +376,7 @@ void cochran_emc_parse_dive_stats (dc_parser_t *abstract, }
time ++; - offset += 3; + offset += COCHRAN_EMC_SAMPLE_SIZE; }
stats->dive_time = time - 1;
John,
I had another look at your latest code today. Armed with your documentation, I tried to understand the logic behind it. I have some comments and questions for you:
The split between the config and data is already a nice improvement. There is still room for improvement in the sense that the config should be all constant and not based on the input data (e.g. the sample_memory_end_address field).
There is a nasty bug in the cochran_commander_serial_setup() function. If there is an error, you close the serial port and free the device structure. But this function is called from several places, without checking the return value. You need to refactor this function to not free anything.
(You also don't need to pass the dc_context_t pointer as a parameter, because it's available from the device struct.)
The cochran_commander_serial_open() function is similar. Although it's not really a problem here, it's confusing that it frees the device struct. I would remove this function, and call serial_open() directly in the cochran_commander_device_open() function.
The remainder of this email is mostly some questions and comments on the data format an how you handle that in the code:
With the exception of the id, config and misc packets, the protocol is mainly based on simple random access to the internal memory. There is no dedicated command to read the logbook and samples. They are simply stored in a certain area of the internal memory. So for the dump api it makes much more sense to implement as full memory dump, instead of reading just two "random" pieces of the memory. The result is that the dump would be much more generic, because you no longer need to interpret the data (in order to find the logbook and samples). This is important when you need the memory dump to diagnose bugs in the logic for downloading the dives. The fact that we don't seem to know the total amount of memory, might be a problem here.
The next thing where I have some concerns, is the logic how you find the logbook and samples. It seems we only know the start address of the logbook and sample ringbuffers (respectively 0 and 0x094000 for the emc), but not the end address. And that results in a few issues:
For the logbook, you assume the first logbook entry is at the address 0, and then move forward until you reach the total number of dives. But in practice, the number of dives that can be stored is not unlimited, and the memory is most likely used in a circular way. Thus, once you have enough dives, this assumption will no longer be correct. Ideally we need to figure out the end of the ringbuffer, so we can take this into account correctly. If we are unable to find this address, then we need at least some checks to verify our assumptions (for example check that the dive number starts at zero and always increases, check that we don't go past the sample area, etc). If you ask me, returning an error is always better than silently returning some wrong data.
A similar problem exist when reading the samples. You have some code that tries to guess where the ringbuffer ends. But there is absolutely no guarantee that those assumptions are correct. I think it's much safer to not guess at all and just fail if the ringbuffer wrap point is crossed (e.g. begin address is higher than the end address). Once someone hits this error, we can investigate and determine the correct ringbuffer end address. If we're guessing, and get it wrong, then we're silently returning bogus data. That's not good. And the chance that your guess is wrong is pretty high. Although I agree that the total memory is likely nicely aligned, the sample ringbuffer can end anywhere. I've seen that more than once in other backends.
[One method that I have used successfully in the past to locate the ringbuffer end, without having access to data that crosses it, is by looking at the full memory dump. Usually unused data in flash memory contains all 0xFF. So if there is another block located right after the ringbuffer area, then the unused tail of the ringbuffer will contain 0xFF, followed by some non 0xFF data from that other block. Thus the first address containing non 0xFF bytes is very likely the end of the ringbuffer.]
The next big question is where does the sample data begins/ends? So far you have identified three pointers: pre, begin and end address. When I dump those pointers (see below), I noticed something interesting:
* The 5th column is the difference between begin and pre. According to your documentation, these are inter dive events. They can be zero length too. Should we consider those as part of the dive, or ignore them? Since I don't know what's in those interdive events, I have no idea.
* The 6th column is the difference between pre and end of the previous entry. This is nearly always equal to 1800. I doubt that's a coincidence. I guess that means there is some 1800 byte header or trailer. Do you have any idea what's in there?
* When the end pointer is ffffffff, the next entry has the same pre pointer. Because it makes no sense that the sample data for two different dives start at the same address, I assume this means that for some reason (e.g. battery empty, too short duration, etc) this dive has no sample data. What you did in this case is take the data until the start of the next dive. But if I'm right, that's the inter dive events. So this makes no sense to me. Do you get anything useful from doing this?
I also tried to parse the test data I received from you, but I can't get anything useful out of it. That might be due to the fact that I don't know at which address the sample data was read when you created that data (that might have changed in your latest code). Btw, that's exactly why I prefer a more generic dump format as explained above. Can you provide some new data for me, or the correct address? I also tried to run the parser under valgrind, and it reports a *huge* number of problems. That might be due to this mismatch in data, but the parser should be robust against whatever you throw at it. The rule of thumb is never trust the input data, and always check everything before you use it. I didn't investigate further, because I first want to confirm with good input :-)
Jef
i | pre | begin | end | 5th | 6th ========================================== 0 00094000 00094834 00094a95 2100 606208 1 0009519d 0009519d 0009674b 0 1800 2 00096e53 00096e7d 000980c9 42 1800 3 000987d1 00098aa9 ffffffff 728 1800 4 000987d1 00098e3c 0009ac28 1643 624594 5 0009b330 0009b330 ffffffff 0 1800 6 0009b330 0009b43c 0009c2c4 268 635697 7 0009c9cc 0009ca46 0009e898 122 1800 8 0009efa0 0009f01d 000a07f4 125 1800 9 000a0efc 000a0f39 000a2d60 61 1800 10 000a3468 000a34a6 000a4f2a 62 1800 11 000a5632 000a5647 000a7207 21 1800 12 000a790f 000a7a3c ffffffff 301 1800 13 000a790f 000a8e3c ffffffff 5421 686352 14 000a790f 000a983c 000aa9e6 7981 686352 15 000ab0ee 000ab26c 000ab40a 382 1800 16 000abb12 000abb12 000abdc5 0 1800 17 000ac4cd 000ac4cd 000ad6c9 0 1800 18 000addd1 000ade4c 000ae5d3 123 1800 19 000aecdb 000aecdb 000b06cd 0 1800 20 000b0dd5 000b0e69 000b40aa 148 1800 21 000b47b2 000b481e ffffffff 108 1800 22 000b47b2 000b5466 000b70c3 3252 739251 23 000b77cb 000b77df 000b9d11 20 1800 24 000ba419 000ba469 000bda64 80 1800 25 000be16c 000be217 000c00ac 171 1800 26 000c07b4 000c084a 000c29ed 150 1800 27 000c30f5 000c34d6 000c4a9f 993 1800 28 000c51a7 000c51a7 000c6449 0 1800 29 000c6b51 000c6e74 000c6e7d 803 1800 30 000c7585 000c79ff 000c9d39 1146 1800 31 000ca441 000ca47f 000cc82d 62 1800 32 000ccf35 000ccf73 000ce838 62 1800 33 000cef40 000cefa7 000d15c7 103 1800 34 000d1ccf 000d1cf8 000d47ed 41 1800 35 000d4ef5 000d4f09 000d77c4 20 1800 36 000d7ecc 000d7ee0 000da532 20 1800 37 000dac3a 000dac4e 000dd172 20 1800 38 000dd87a 000dd88e 000df0bd 20 1800 39 000df7c5 000df7ee 000e2f19 41 1800 40 000e3621 000e364a 000e5730 41 1800 41 000e5e38 000e5e4c 000e880c 20 1800 42 000e8f14 000e8f28 000eb7cf 20 1800 43 000ebed7 000ebeeb 000eee22 20 1800 44 000ef52f 000ef543 000f1e4f 20 1805 45 000f255b 000f2725 000f2761 458 1804 46 000f2e69 000f2e69 000f599f 0 1800 47 000f60a7 000f60f9 000f8e7f 82 1800 48 000f9587 000f95b0 000fb7e2 41 1800 49 000fbeea 000fbfd8 000febe4 238 1800 50 000ff2ec 000ff397 00100e44 171 1800 51 0010154c 0010154c 0010160c 0 1800 52 00101d14 00101dd9 00103a7e 197 1800 53 00104186 001041f2 00105a4a 108 1800 54 00106152 00106167 00107b79 21 1800 55 00108281 001083d0 0010b8cc 335 1800 56 0010bfd4 0010c012 0010f0dc 62 1800 57 0010f7e4 0010f7f8 00111a89 20 1800 58 00112191 001121cf 00114e23 62 1800 59 0011552b 0011553f 00118412 20 1800 60 00118b1a 00118b58 0011b9a5 62 1800 61 0011c0ad 0011c0c1 0011e8ab 20 1800 62 0011efb3 0011efdc 00121e25 41 1800 63 0012252d 00122541 00124dc4 20 1800 64 001254cc 001254e0 00128055 20 1800 65 0012875d 00128771 0012b036 20 1800 66 0012b73e 0012b767 0012e4ba 41 1800 67 0012ebc2 0012ebd6 00130d22 20 1800 68 0013142a 0013143e 00133d2f 20 1800 69 00134437 00134437 00136ef7 0 1800 70 001375ff 00137628 0013912e 41 1800 71 00139836 0013984a 0013c3f2 20 1800 72 0013cafa 0013cdbc 00140ab6 706 1800 73 001411be 00141269 00143321 171 1800 74 00143a29 00143a29 001455fa 0 1800 75 00145d02 00145d42 00148ae2 64 1800 76 001491ea 0014933f 0014a805 341 1800 77 0014af0d 0014af0d 0014b1eb 0 1800 78 0014b8f3 0014b94a 0014ef9d 87 1800 79 0014f6a5 0014f70d ffffffff 104 1800 80 0014f6a5 0015043c 00151442 3479 1373862 81 00151b4a 00151b4a 00153815 0 1800 82 00153f1d 00153fb3 ffffffff 150 1800 83 00153f1d 0015643c ffffffff 9503 1392414 84 00153f1d 0015663c ffffffff 10015 1392414 85 00153f1d 001566e0 ffffffff 10179 1392414 86 00153f1d 0015943c ffffffff 21791 1392414 87 00153f1d 0015983c 0015a55f 22815 1392414 88 0015ac67 0015ad0e ffffffff 167 1800 89 0015ac67 0015ca3c 0015d1c0 7637 1420392 90 0015d8c8 0015d8f1 0015fa0c 41 1800 91 00160114 00160129 ffffffff 21 1800 92 00160114 0016043c ffffffff 808 1442069 93 00160114 00161c3c 001620bf 6952 1442069 94 001627c7 001627db ffffffff 20 1800 95 001627c7 0016447b ffffffff 7348 1451976 96 001627c7 0016463c 001661b3 7797 1451976 97 001668bb 0016694d 00167a66 146 1800 98 0016816e 0016853f 00169fa2 977 1800 99 0016a6aa 0016a6aa 0016c07d 0 1800 100 0016c785 0016cae1 0016f7b4 860 1800 101 0016febc 0016fee5 00172dee 41 1800 102 001734f6 0017350a 00175cdb 20 1800 103 001763e3 0017640c 00177487 41 1800 104 00177b8f 00177bd1 00179239 66 1800 105 00179941 00179955 0017aae1 20 1800 106 0017b1e9 0017b212 0017c680 41 1800 107 0017cd88 0017ce2f 0017fbce 167 1800 108 001802d6 001802ff 00182cfe 41 1800 109 00183406 0018342f 0018591f 41 1800 110 00186027 00186066 0018821d 63 1800 111 00188925 001889a2 0018b8a6 125 1800 112 0018bfae 0018c452 0018d833 1188 1800 113 0018df3b 0018df8e 00190d5f 83 1800
On Sun, Dec 7, 2014 at 3:14 PM, Jef Driesen jef@libdivecomputer.org wrote:
John,
I had another look at your latest code today. Armed with your documentation, I tried to understand the logic behind it. I have some comments and questions for you:
The split between the config and data is already a nice improvement. There is still room for improvement in the sense that the config should be all constant and not based on the input data (e.g. the sample_memory_end_address field).
There is a nasty bug in the cochran_commander_serial_setup() function. If there is an error, you close the serial port and free the device structure. But this function is called from several places, without checking the return value. You need to refactor this function to not free anything.
(You also don't need to pass the dc_context_t pointer as a parameter, because it's available from the device struct.)
The cochran_commander_serial_open() function is similar. Although it's not really a problem here, it's confusing that it frees the device struct. I would remove this function, and call serial_open() directly in the cochran_commander_device_open() function.
I seperated those when I thought I had to close and re-open the port to
move from high-baud back to low. At the time i was still simulating the Analyst software. I can recombine them.
The remainder of this email is mostly some questions and comments on the data format an how you handle that in the code:
With the exception of the id, config and misc packets, the protocol is mainly based on simple random access to the internal memory. There is no dedicated command to read the logbook and samples. They are simply stored in a certain area of the internal memory. So for the dump api it makes much more sense to implement as full memory dump, instead of reading just two "random" pieces of the memory. The result is that the dump would be much more generic, because you no longer need to interpret the data (in order to find the logbook and samples). This is important when you need the memory dump to diagnose bugs in the logic for downloading the dives. The fact that we don't seem to know the total amount of memory, might be a problem here.
What I find interesting is that logbook data is at address 0 (for the read command) and that the sample data read is far enough along to justify the config pages and maybe some program data, although I have no idea how large the program for this could be. GIven the background of Mike Cochran I suspect it's tight code.
So for the EMC 16 and 20H the logbook is from 0 to 0x80000 (1024 entries) then samples begine at 0x94000. I haven't looked to see what the 0x14000 bytes between the logbook and sample are, I suspect config and program data, although I'm surprised that doesn't start a 0.
Now for the EMC 20H the quoted maximum sample data is 1450 hours. 3 byte samples * 60 seconds * 60 minutes * 1450 hours is 0xEEF3E0, or close to 16MB, when added to the 0x94000 base. It's short but I suspect that 1450 is a rounded number.
To add to this, EMC models are sold with different amounts of memory configured. Word from the dealer is that it's configurable via software. That means each 20H is shipped with the same hardware configuration and that something in one of the other data blocks indicates how much memory is enabled. I've been unable to find where that might be and I'm worried it's stored as a feature code instead of simply the amount of memory.
The next thing where I have some concerns, is the logic how you find the logbook and samples. It seems we only know the start address of the logbook and sample ringbuffers (respectively 0 and 0x094000 for the emc), but not the end address. And that results in a few issues:
We don't really know the start. I just worked with my first EMC-14 and its samples start at 0x20000, mostly because it supports only 256 logs.
For the logbook, you assume the first logbook entry is at the address 0, and then move forward until you reach the total number of dives. But in practice, the number of dives that can be stored is not unlimited, and the memory is most likely used in a circular way. Thus, once you have enough dives, this assumption will no longer be correct. Ideally we need to figure out the end of the ringbuffer, so we can take this into account correctly. If we are unable to find this address, then we need at least some checks to verify our assumptions (for example check that the dive number starts at zero and always increases, check that we don't go past the sample area, etc). If you ask me, returning an error is always better than silently returning some wrong data.
The number of log book entries is also configured via software. In other words the DC can be configured to ignore part of the logbook ringbuffer. It might be a head/tail pointer or something.
You can see that the number of dives is available in the config data block. I suspect that a starting dive number is also present but since it's set at 00 00 now it can't be identified. Also because of the insane number of dives this computer will log it would take me 8 years to wrap mine.
Checking dive numbers will actually work for most divers. I suspect that all but a few will never wrap the ring buffer before changing DCs.
A similar problem exist when reading the samples. You have some code that tries to guess where the ringbuffer ends. But there is absolutely no guarantee that those assumptions are correct. I think it's much safer to not guess at all and just fail if the ringbuffer wrap point is crossed (e.g. begin address is higher than the end address). Once someone hits this error, we can investigate and determine the correct ringbuffer end address. If we're guessing, and get it wrong, then we're silently returning bogus data. That's not good. And the chance that your guess is wrong is pretty high. Although I agree that the total memory is likely nicely aligned, the sample ringbuffer can end anywhere. I've seen that more than once in other backends.
It's more likely that a ring buffer will wrap than the logbook. I have a Commander that has already wrapped (I bought it used) and so I have a working example. From USB traffic I could see the addresses used by Analyst and it's a nice clean alignment. I have no evidence on an EMC but it seems safe to assume it's a minor upgrade on the Commander software.
[One method that I have used successfully in the past to locate the ringbuffer end, without having access to data that crosses it, is by looking at the full memory dump. Usually unused data in flash memory contains all 0xFF. So if there is another block located right after the ringbuffer area, then the unused tail of the ringbuffer will contain 0xFF, followed by some non 0xFF data from that other block. Thus the first address containing non 0xFF bytes is very likely the end of the ringbuffer.]
There are three problems with that. First, for the EMC 20H, that's 16 MB of data and even at 800K baud it's going to be a while to download. Probing might make more sense. Reading 1K at certain points as a test.
Second, since the sample ring buffer size is software configurable the end may not be apparent. I expect the end of the buffer is truncated on a DC configured with less memory. That means that the technique of probing will probably return a larger size than it should.
Third, I suspect that the end of sample memory is the end of physical memory. It's a guess based on the number of quoted number of samples supported and the starting address of samples. I don't know what will happen if we read beyond physical memory.
The next big question is where does the sample data begins/ends? So far you have identified three pointers: pre, begin and end address. When I dump those pointers (see below), I noticed something interesting:
- The 5th column is the difference between begin and pre. According to
your documentation, these are inter dive events. They can be zero length too. Should we consider those as part of the dive, or ignore them? Since I don't know what's in those interdive events, I have no idea.
- The 6th column is the difference between pre and end of the previous
entry. This is nearly always equal to 1800. I doubt that's a coincidence. I guess that means there is some 1800 byte header or trailer. Do you have any idea what's in there?
- When the end pointer is ffffffff, the next entry has the same pre
pointer. Because it makes no sense that the sample data for two different dives start at the same address, I assume this means that for some reason (e.g. battery empty, too short duration, etc) this dive has no sample data. What you did in this case is take the data until the start of the next dive. But if I'm right, that's the inter dive events. So this makes no sense to me. Do you get anything useful from doing this?
Pre-dive events consist of the following (and more) according to documentation.
- Initialization of the unit. - The unit is turned on - Low batteries - Altitude Changes of over 500 feet - Temperature Changes of 10 degrees - Sensor Malfunction - Analyst ® P.C. Communication
Interdive events like the above are multiple bytes in length, up to 20 bytes. The DC has no on or off switch. It turns on under certain conditions and off after a certain amount of off-gassing after a dive. So many dives will have at least an On event occur before dives. The first dive has a lot of events because I was configuring the device via Analyst and each config change seems to be logged.
The DC also stays on after a dive and logs off-gassing. I'd bet that's the 1800 bytes of logs after. Although I assumed it was longer. Off gassing samples are recognizable by the depth change and ascent rate samples being 0x40 and 0x80 respectively (these decode to negative zero.)
A pointer of FFFFFFFF means the end of dive was never logged. I had a problem with my DC where it would reboot not logging the end of dive. The code tries to salvage the dive logs.
I also tried to parse the test data I received from you, but I can't get anything useful out of it. That might be due to the fact that I don't know at which address the sample data was read when you created that data (that might have changed in your latest code). Btw, that's exactly why I prefer a more generic dump format as explained above. Can you provide some new data for me, or the correct address? I also tried to run the parser under valgrind, and it reports a *huge* number of problems. That might be due to this mismatch in data, but the parser should be robust against whatever you throw at it. The rule of thumb is never trust the input data, and always check everything before you use it. I didn't investigate further, because I first want to confirm with good input :-)
As you can see from the source, the reads are based on data from the config blocks and from the logbook. Config tells me how many logs to read. Reading the logbook for pointers results in knowing which memory addresses to read.
For an EMC 20H the sample data starts at 0x94000. It's been a while since I checked that the code is actually reading from that point, but it does round down 16k.
I've not tried to read the data, like you have. My simulator used earlier files with names that included their memory addresses. From a glance the data looks good.
But let's go through an example. I'm using hexdump to view the files and doing this manually.
Take dive 45 from your list. The log entry can be found at 0x5A00 in the logbook.bin file and we can see the pre-pointer of 0xf255b, sample ptr of 0xf2725 and end ptr of 0xf2761. When I subtract 0x94000 from those I get 0x5e55b, 0x5e725 and 0x5e761. So I search for 5e550 in the the hexdump to get to the right place in the output. You can see 22 inter-dive events starting a 0x5e55b. The events are:
02 81 e3 7e 2a 0b 0a 10 04 08 0e fd 02 00 00 0e 00 09 52 00 10 80 15 7f 2a 1e 2b 13 04 08 0e da 02 00 00 0e 00 6f 5e 6c 54 10 00 7e 7f 2a 16 09 03 05 08 0e d9 02 00 00 0e 00 6e 54 6f 5e 01 40 0c 81 2a 22 1c 07 06 08 0e d8 02 00 00 0f 02 bb 4c 00 00 0f 02 01 80 0d 81 2a 36 21 07 06 08 0e d8 02 00 00 0c 04 b2 4c 0f 02 0c 04 01 40 1e 81 2a 16 2d 08 06 08 0e d8 02 00 00 13 06 dc 4b 0c 04 13 06 01 00 25 81 2a 0a 0e 09 06 08 0e d8 02 00 00 1b 04 62 4b 13 06 8b 06 01 c0 30 81 2a 12 04 0a 06 08 0e d8 02 00 00 1e 02 b1 4a 1b 04 8b 06 01 80 35 81 2a 22 18 0a 06 08 0e d8 02 00 00 00 00 7b 4a 1e 02 8b 06 10 80 39 81 2a 26 29 0a 06 08 0e d8 02 00 00 ff 00 6d 4a 6e 54 03 6b 7a 82 2a 35 1e 09 07 08 0e d8 02 00 00 ff 00 c8 48 02 f9 7d 82 2a 03 2e 09 07 08 0e fd 02 00 00 9b 00 14 48 00 02 12 bf 82 2a 30 17 0e 07 08 0e fd 02 00 00 00 00 a3 4a 00 03 25 bf 82 2a 07 18 0e 07 08 0e da 02 00 00 00 00 c3 4a 03 29 cf 82 2a 1b 20 0f 07 08 0e d8 02 00 00 00 00 ba 51 02 b1 d1 82 2a 0f 2b 0f 07 08 0e fd 02 00 00 38 00 ea 50 00 02 1d 09 83 2a 2b 27 13 07 08 0e ff 02 00 00 00 00 5b 4e 00 03 2d 09 83 2a 3b 27 13 07 08 0e e1 02 00 00 00 00 59 4e 03 02 0b 83 2a 30 2f 13 07 08 0e d8 02 00 00 46 00 a4 4e 10 00 6e 83 2a 0a 32 02 08 08 0e d6 02 00 00 71 00 67 40 6d 4a 10 e8 10 84 2a 0e 19 0e 08 08 0e fc 02 00 00 00 00 8a 52 67 40 02 e8 10 84 2a 0e 19 0e 08 08 0e fc 02 00 00 00 00 8a 52 00
You can see in the above columns 2 through 5 is a 4 byte time stamp (seconds since Jan 1, 1992). It's a consistent enough number that it helps to manually delineate pre-dive events.
The dive starts. Often (I suspect always when the EMC is in FO2 mode, i.e. open circuit) the dive starts with *dive events* e3 f3. A starting depth change sample of 0x40 is also a hint to the start of a dive. Hints like this are only used by the code that tried to read bad dives.
Fast forwarding to the end of the dive, I jump to 5e761 in the file and just before that you'll see dive events e1 c0, surface and switch-to-air events. Following that is off-gassing samples until the begin of the next dive's pre-dive events (if any). Maybe this dive was setting a dive flag for a training session. Maybe the off-gassing time of 10 minutes (1800 bytes) is based on the configured SIT interval after which a new dive can start.
I've been hesitant in reading the entire memory of the device. I may try with my used device, which is a Commander.
I've been holding off asking Cochran again for help. They previously refused citing concern that open source code would reveal too much about their algorithm. Now that the code exists I wonder if they would be willing to help by telling me how to determine model and memory configuration, or if they will demand that I stop.. It's also the same reason I've been trying to respect their privacy by not reading anything but my data.
Jef
i | pre | begin | end | 5th | 6th
0 00094000 00094834 00094a95 2100 606208 1 0009519d 0009519d 0009674b 0 1800 2 00096e53 00096e7d 000980c9 42 1800 3 000987d1 00098aa9 ffffffff 728 1800 4 000987d1 00098e3c 0009ac28 1643 624594 5 0009b330 0009b330 ffffffff 0 1800 6 0009b330 0009b43c 0009c2c4 268 635697 7 0009c9cc 0009ca46 0009e898 122 1800 8 0009efa0 0009f01d 000a07f4 125 1800 9 000a0efc 000a0f39 000a2d60 61 1800 10 000a3468 000a34a6 000a4f2a 62 1800 11 000a5632 000a5647 000a7207 21 1800 12 000a790f 000a7a3c ffffffff 301 1800 13 000a790f 000a8e3c ffffffff 5421 686352 14 000a790f 000a983c 000aa9e6 7981 686352 15 000ab0ee 000ab26c 000ab40a 382 1800 16 000abb12 000abb12 000abdc5 0 1800 17 000ac4cd 000ac4cd 000ad6c9 0 1800 18 000addd1 000ade4c 000ae5d3 123 1800 19 000aecdb 000aecdb 000b06cd 0 1800 20 000b0dd5 000b0e69 000b40aa 148 1800 21 000b47b2 000b481e ffffffff 108 1800 22 000b47b2 000b5466 000b70c3 3252 739251 23 000b77cb 000b77df 000b9d11 20 1800 24 000ba419 000ba469 000bda64 80 1800 25 000be16c 000be217 000c00ac 171 1800 26 000c07b4 000c084a 000c29ed 150 1800 27 000c30f5 000c34d6 000c4a9f 993 1800 28 000c51a7 000c51a7 000c6449 0 1800 29 000c6b51 000c6e74 000c6e7d 803 1800 30 000c7585 000c79ff 000c9d39 1146 1800 31 000ca441 000ca47f 000cc82d 62 1800 32 000ccf35 000ccf73 000ce838 62 1800 33 000cef40 000cefa7 000d15c7 103 1800 34 000d1ccf 000d1cf8 000d47ed 41 1800 35 000d4ef5 000d4f09 000d77c4 20 1800 36 000d7ecc 000d7ee0 000da532 20 1800 37 000dac3a 000dac4e 000dd172 20 1800 38 000dd87a 000dd88e 000df0bd 20 1800 39 000df7c5 000df7ee 000e2f19 41 1800 40 000e3621 000e364a 000e5730 41 1800 41 000e5e38 000e5e4c 000e880c 20 1800 42 000e8f14 000e8f28 000eb7cf 20 1800 43 000ebed7 000ebeeb 000eee22 20 1800 44 000ef52f 000ef543 000f1e4f 20 1805 45 000f255b 000f2725 000f2761 458 1804 46 000f2e69 000f2e69 000f599f 0 1800 47 000f60a7 000f60f9 000f8e7f 82 1800 48 000f9587 000f95b0 000fb7e2 41 1800 49 000fbeea 000fbfd8 000febe4 238 1800 50 000ff2ec 000ff397 00100e44 171 1800 51 0010154c 0010154c 0010160c 0 1800 52 00101d14 00101dd9 00103a7e 197 1800 53 00104186 001041f2 00105a4a 108 1800 54 00106152 00106167 00107b79 21 1800 55 00108281 001083d0 0010b8cc 335 1800 56 0010bfd4 0010c012 0010f0dc 62 1800 57 0010f7e4 0010f7f8 00111a89 20 1800 58 00112191 001121cf 00114e23 62 1800 59 0011552b 0011553f 00118412 20 1800 60 00118b1a 00118b58 0011b9a5 62 1800 61 0011c0ad 0011c0c1 0011e8ab 20 1800 62 0011efb3 0011efdc 00121e25 41 1800 63 0012252d 00122541 00124dc4 20 1800 64 001254cc 001254e0 00128055 20 1800 65 0012875d 00128771 0012b036 20 1800 66 0012b73e 0012b767 0012e4ba 41 1800 67 0012ebc2 0012ebd6 00130d22 20 1800 68 0013142a 0013143e 00133d2f 20 1800 69 00134437 00134437 00136ef7 0 1800 70 001375ff 00137628 0013912e 41 1800 71 00139836 0013984a 0013c3f2 20 1800 72 0013cafa 0013cdbc 00140ab6 706 1800 73 001411be 00141269 00143321 171 1800 74 00143a29 00143a29 001455fa 0 1800 75 00145d02 00145d42 00148ae2 64 1800 76 001491ea 0014933f 0014a805 341 1800 77 0014af0d 0014af0d 0014b1eb 0 1800 78 0014b8f3 0014b94a 0014ef9d 87 1800 79 0014f6a5 0014f70d ffffffff 104 1800 80 0014f6a5 0015043c 00151442 3479 1373862 81 00151b4a 00151b4a 00153815 0 1800 82 00153f1d 00153fb3 ffffffff 150 1800 83 00153f1d 0015643c ffffffff 9503 1392414 84 00153f1d 0015663c ffffffff 10015 1392414 85 00153f1d 001566e0 ffffffff 10179 1392414 86 00153f1d 0015943c ffffffff 21791 1392414 87 00153f1d 0015983c 0015a55f 22815 1392414 88 0015ac67 0015ad0e ffffffff 167 1800 89 0015ac67 0015ca3c 0015d1c0 7637 1420392 90 0015d8c8 0015d8f1 0015fa0c 41 1800 91 00160114 00160129 ffffffff 21 1800 92 00160114 0016043c ffffffff 808 1442069 93 00160114 00161c3c 001620bf 6952 1442069 94 001627c7 001627db ffffffff 20 1800 95 001627c7 0016447b ffffffff 7348 1451976 96 001627c7 0016463c 001661b3 7797 1451976 97 001668bb 0016694d 00167a66 146 1800 98 0016816e 0016853f 00169fa2 977 1800 99 0016a6aa 0016a6aa 0016c07d 0 1800 100 0016c785 0016cae1 0016f7b4 860 1800 101 0016febc 0016fee5 00172dee 41 1800 102 001734f6 0017350a 00175cdb 20 1800 103 001763e3 0017640c 00177487 41 1800 104 00177b8f 00177bd1 00179239 66 1800 105 00179941 00179955 0017aae1 20 1800 106 0017b1e9 0017b212 0017c680 41 1800 107 0017cd88 0017ce2f 0017fbce 167 1800 108 001802d6 001802ff 00182cfe 41 1800 109 00183406 0018342f 0018591f 41 1800 110 00186027 00186066 0018821d 63 1800 111 00188925 001889a2 0018b8a6 125 1800 112 0018bfae 0018c452 0018d833 1188 1800 113 0018df3b 0018df8e 00190d5f 83 1800
On Sun, Dec 7, 2014 at 6:50 PM, John Van Ostrand john@vanostrand.com wrote:
On Sun, Dec 7, 2014 at 3:14 PM, Jef Driesen jef@libdivecomputer.org wrote:
John,
I had another look at your latest code today. Armed with your documentation, I tried to understand the logic behind it. I have some comments and questions for you:
The split between the config and data is already a nice improvement. There is still room for improvement in the sense that the config should be all constant and not based on the input data (e.g. the sample_memory_end_address field).
There is a nasty bug in the cochran_commander_serial_setup() function. If there is an error, you close the serial port and free the device structure. But this function is called from several places, without checking the return value. You need to refactor this function to not free anything.
(You also don't need to pass the dc_context_t pointer as a parameter, because it's available from the device struct.)
The cochran_commander_serial_open() function is similar. Although it's not really a problem here, it's confusing that it frees the device struct. I would remove this function, and call serial_open() directly in the cochran_commander_device_open() function.
I seperated those when I thought I had to close and re-open the port to
move from high-baud back to low. At the time i was still simulating the Analyst software. I can recombine them.
The remainder of this email is mostly some questions and comments on the data format an how you handle that in the code:
With the exception of the id, config and misc packets, the protocol is mainly based on simple random access to the internal memory. There is no dedicated command to read the logbook and samples. They are simply stored in a certain area of the internal memory. So for the dump api it makes much more sense to implement as full memory dump, instead of reading just two "random" pieces of the memory. The result is that the dump would be much more generic, because you no longer need to interpret the data (in order to find the logbook and samples). This is important when you need the memory dump to diagnose bugs in the logic for downloading the dives. The fact that we don't seem to know the total amount of memory, might be a problem here.
What I find interesting is that logbook data is at address 0 (for the read command) and that the sample data read is far enough along to justify the config pages and maybe some program data, although I have no idea how large the program for this could be. GIven the background of Mike Cochran I suspect it's tight code.
So for the EMC 16 and 20H the logbook is from 0 to 0x80000 (1024 entries) then samples begine at 0x94000. I haven't looked to see what the 0x14000 bytes between the logbook and sample are, I suspect config and program data, although I'm surprised that doesn't start a 0.
Now for the EMC 20H the quoted maximum sample data is 1450 hours. 3 byte samples * 60 seconds * 60 minutes * 1450 hours is 0xEEF3E0, or close to 16MB, when added to the 0x94000 base. It's short but I suspect that 1450 is a rounded number.
To add to this, EMC models are sold with different amounts of memory configured. Word from the dealer is that it's configurable via software. That means each 20H is shipped with the same hardware configuration and that something in one of the other data blocks indicates how much memory is enabled. I've been unable to find where that might be and I'm worried it's stored as a feature code instead of simply the amount of memory.
The next thing where I have some concerns, is the logic how you find the logbook and samples. It seems we only know the start address of the logbook and sample ringbuffers (respectively 0 and 0x094000 for the emc), but not the end address. And that results in a few issues:
We don't really know the start. I just worked with my first EMC-14 and its samples start at 0x20000, mostly because it supports only 256 logs.
For the logbook, you assume the first logbook entry is at the address 0, and then move forward until you reach the total number of dives. But in practice, the number of dives that can be stored is not unlimited, and the memory is most likely used in a circular way. Thus, once you have enough dives, this assumption will no longer be correct. Ideally we need to figure out the end of the ringbuffer, so we can take this into account correctly. If we are unable to find this address, then we need at least some checks to verify our assumptions (for example check that the dive number starts at zero and always increases, check that we don't go past the sample area, etc). If you ask me, returning an error is always better than silently returning some wrong data.
The number of log book entries is also configured via software. In other words the DC can be configured to ignore part of the logbook ringbuffer. It might be a head/tail pointer or something.
You can see that the number of dives is available in the config data block. I suspect that a starting dive number is also present but since it's set at 00 00 now it can't be identified. Also because of the insane number of dives this computer will log it would take me 8 years to wrap mine.
Checking dive numbers will actually work for most divers. I suspect that all but a few will never wrap the ring buffer before changing DCs.
A similar problem exist when reading the samples. You have some code that tries to guess where the ringbuffer ends. But there is absolutely no guarantee that those assumptions are correct. I think it's much safer to not guess at all and just fail if the ringbuffer wrap point is crossed (e.g. begin address is higher than the end address). Once someone hits this error, we can investigate and determine the correct ringbuffer end address. If we're guessing, and get it wrong, then we're silently returning bogus data. That's not good. And the chance that your guess is wrong is pretty high. Although I agree that the total memory is likely nicely aligned, the sample ringbuffer can end anywhere. I've seen that more than once in other backends.
It's more likely that a ring buffer will wrap than the logbook. I have a Commander that has already wrapped (I bought it used) and so I have a working example. From USB traffic I could see the addresses used by Analyst and it's a nice clean alignment. I have no evidence on an EMC but it seems safe to assume it's a minor upgrade on the Commander software.
[One method that I have used successfully in the past to locate the ringbuffer end, without having access to data that crosses it, is by looking at the full memory dump. Usually unused data in flash memory contains all 0xFF. So if there is another block located right after the ringbuffer area, then the unused tail of the ringbuffer will contain 0xFF, followed by some non 0xFF data from that other block. Thus the first address containing non 0xFF bytes is very likely the end of the ringbuffer.]
There are three problems with that. First, for the EMC 20H, that's 16 MB of data and even at 800K baud it's going to be a while to download. Probing might make more sense. Reading 1K at certain points as a test.
Second, since the sample ring buffer size is software configurable the end may not be apparent. I expect the end of the buffer is truncated on a DC configured with less memory. That means that the technique of probing will probably return a larger size than it should.
Third, I suspect that the end of sample memory is the end of physical memory. It's a guess based on the number of quoted number of samples supported and the starting address of samples. I don't know what will happen if we read beyond physical memory.
The next big question is where does the sample data begins/ends? So far you have identified three pointers: pre, begin and end address. When I dump those pointers (see below), I noticed something interesting:
- The 5th column is the difference between begin and pre. According to
your documentation, these are inter dive events. They can be zero length too. Should we consider those as part of the dive, or ignore them? Since I don't know what's in those interdive events, I have no idea.
- The 6th column is the difference between pre and end of the previous
entry. This is nearly always equal to 1800. I doubt that's a coincidence. I guess that means there is some 1800 byte header or trailer. Do you have any idea what's in there?
- When the end pointer is ffffffff, the next entry has the same pre
pointer. Because it makes no sense that the sample data for two different dives start at the same address, I assume this means that for some reason (e.g. battery empty, too short duration, etc) this dive has no sample data. What you did in this case is take the data until the start of the next dive. But if I'm right, that's the inter dive events. So this makes no sense to me. Do you get anything useful from doing this?
Pre-dive events consist of the following (and more) according to documentation.
- Initialization of the unit.
- The unit is turned on
- Low batteries
- Altitude Changes of over 500 feet
- Temperature Changes of 10 degrees
- Sensor Malfunction
- Analyst ® P.C. Communication
Interdive events like the above are multiple bytes in length, up to 20 bytes. The DC has no on or off switch. It turns on under certain conditions and off after a certain amount of off-gassing after a dive. So many dives will have at least an On event occur before dives. The first dive has a lot of events because I was configuring the device via Analyst and each config change seems to be logged.
The DC also stays on after a dive and logs off-gassing. I'd bet that's the 1800 bytes of logs after. Although I assumed it was longer. Off gassing samples are recognizable by the depth change and ascent rate samples being 0x40 and 0x80 respectively (these decode to negative zero.)
A pointer of FFFFFFFF means the end of dive was never logged. I had a problem with my DC where it would reboot not logging the end of dive. The code tries to salvage the dive logs.
I also tried to parse the test data I received from you, but I can't get anything useful out of it. That might be due to the fact that I don't know at which address the sample data was read when you created that data (that might have changed in your latest code). Btw, that's exactly why I prefer a more generic dump format as explained above. Can you provide some new data for me, or the correct address? I also tried to run the parser under valgrind, and it reports a *huge* number of problems. That might be due to this mismatch in data, but the parser should be robust against whatever you throw at it. The rule of thumb is never trust the input data, and always check everything before you use it. I didn't investigate further, because I first want to confirm with good input :-)
As you can see from the source, the reads are based on data from the config blocks and from the logbook. Config tells me how many logs to read. Reading the logbook for pointers results in knowing which memory addresses to read.
For an EMC 20H the sample data starts at 0x94000. It's been a while since I checked that the code is actually reading from that point, but it does round down 16k.
I've not tried to read the data, like you have. My simulator used earlier files with names that included their memory addresses. From a glance the data looks good.
But let's go through an example. I'm using hexdump to view the files and doing this manually.
Take dive 45 from your list. The log entry can be found at 0x5A00 in the logbook.bin file and we can see the pre-pointer of 0xf255b, sample ptr of 0xf2725 and end ptr of 0xf2761. When I subtract 0x94000 from those I get 0x5e55b, 0x5e725 and 0x5e761. So I search for 5e550 in the the hexdump to get to the right place in the output. You can see 22 inter-dive events starting a 0x5e55b. The events are:
02 81 e3 7e 2a 0b 0a 10 04 08 0e fd 02 00 00 0e 00 09 52 00 10 80 15 7f 2a 1e 2b 13 04 08 0e da 02 00 00 0e 00 6f 5e 6c 54 10 00 7e 7f 2a 16 09 03 05 08 0e d9 02 00 00 0e 00 6e 54 6f 5e 01 40 0c 81 2a 22 1c 07 06 08 0e d8 02 00 00 0f 02 bb 4c 00 00 0f 02 01 80 0d 81 2a 36 21 07 06 08 0e d8 02 00 00 0c 04 b2 4c 0f 02 0c 04 01 40 1e 81 2a 16 2d 08 06 08 0e d8 02 00 00 13 06 dc 4b 0c 04 13 06 01 00 25 81 2a 0a 0e 09 06 08 0e d8 02 00 00 1b 04 62 4b 13 06 8b 06 01 c0 30 81 2a 12 04 0a 06 08 0e d8 02 00 00 1e 02 b1 4a 1b 04 8b 06 01 80 35 81 2a 22 18 0a 06 08 0e d8 02 00 00 00 00 7b 4a 1e 02 8b 06 10 80 39 81 2a 26 29 0a 06 08 0e d8 02 00 00 ff 00 6d 4a 6e 54 03 6b 7a 82 2a 35 1e 09 07 08 0e d8 02 00 00 ff 00 c8 48 02 f9 7d 82 2a 03 2e 09 07 08 0e fd 02 00 00 9b 00 14 48 00 02 12 bf 82 2a 30 17 0e 07 08 0e fd 02 00 00 00 00 a3 4a 00 03 25 bf 82 2a 07 18 0e 07 08 0e da 02 00 00 00 00 c3 4a 03 29 cf 82 2a 1b 20 0f 07 08 0e d8 02 00 00 00 00 ba 51 02 b1 d1 82 2a 0f 2b 0f 07 08 0e fd 02 00 00 38 00 ea 50 00 02 1d 09 83 2a 2b 27 13 07 08 0e ff 02 00 00 00 00 5b 4e 00 03 2d 09 83 2a 3b 27 13 07 08 0e e1 02 00 00 00 00 59 4e 03 02 0b 83 2a 30 2f 13 07 08 0e d8 02 00 00 46 00 a4 4e 10 00 6e 83 2a 0a 32 02 08 08 0e d6 02 00 00 71 00 67 40 6d 4a 10 e8 10 84 2a 0e 19 0e 08 08 0e fc 02 00 00 00 00 8a 52 67 40 02 e8 10 84 2a 0e 19 0e 08 08 0e fc 02 00 00 00 00 8a 52 00
You can see in the above columns 2 through 5 is a 4 byte time stamp (seconds since Jan 1, 1992). It's a consistent enough number that it helps to manually delineate pre-dive events.
The dive starts. Often (I suspect always when the EMC is in FO2 mode, i.e. open circuit) the dive starts with *dive events* e3 f3. A starting depth change sample of 0x40 is also a hint to the start of a dive. Hints like this are only used by the code that tried to read bad dives.
Fast forwarding to the end of the dive, I jump to 5e761 in the file and just before that you'll see dive events e1 c0, surface and switch-to-air events. Following that is off-gassing samples until the begin of the next dive's pre-dive events (if any). Maybe this dive was setting a dive flag for a training session. Maybe the off-gassing time of 10 minutes (1800 bytes) is based on the configured SIT interval after which a new dive can start.
I've been hesitant in reading the entire memory of the device. I may try with my used device, which is a Commander.
I've been holding off asking Cochran again for help. They previously refused citing concern that open source code would reveal too much about their algorithm. Now that the code exists I wonder if they would be willing to help by telling me how to determine model and memory configuration, or if they will demand that I stop.. It's also the same reason I've been trying to respect their privacy by not reading anything but my data.
Jef
i | pre | begin | end | 5th | 6th
0 00094000 00094834 00094a95 2100 606208 1 0009519d 0009519d 0009674b 0 1800 2 00096e53 00096e7d 000980c9 42 1800 3 000987d1 00098aa9 ffffffff 728 1800 4 000987d1 00098e3c 0009ac28 1643 624594 5 0009b330 0009b330 ffffffff 0 1800 6 0009b330 0009b43c 0009c2c4 268 635697 7 0009c9cc 0009ca46 0009e898 122 1800 8 0009efa0 0009f01d 000a07f4 125 1800 9 000a0efc 000a0f39 000a2d60 61 1800 10 000a3468 000a34a6 000a4f2a 62 1800 11 000a5632 000a5647 000a7207 21 1800 12 000a790f 000a7a3c ffffffff 301 1800 13 000a790f 000a8e3c ffffffff 5421 686352 14 000a790f 000a983c 000aa9e6 7981 686352 15 000ab0ee 000ab26c 000ab40a 382 1800 16 000abb12 000abb12 000abdc5 0 1800 17 000ac4cd 000ac4cd 000ad6c9 0 1800 18 000addd1 000ade4c 000ae5d3 123 1800 19 000aecdb 000aecdb 000b06cd 0 1800 20 000b0dd5 000b0e69 000b40aa 148 1800 21 000b47b2 000b481e ffffffff 108 1800 22 000b47b2 000b5466 000b70c3 3252 739251 23 000b77cb 000b77df 000b9d11 20 1800 24 000ba419 000ba469 000bda64 80 1800 25 000be16c 000be217 000c00ac 171 1800 26 000c07b4 000c084a 000c29ed 150 1800 27 000c30f5 000c34d6 000c4a9f 993 1800 28 000c51a7 000c51a7 000c6449 0 1800 29 000c6b51 000c6e74 000c6e7d 803 1800 30 000c7585 000c79ff 000c9d39 1146 1800 31 000ca441 000ca47f 000cc82d 62 1800 32 000ccf35 000ccf73 000ce838 62 1800 33 000cef40 000cefa7 000d15c7 103 1800 34 000d1ccf 000d1cf8 000d47ed 41 1800 35 000d4ef5 000d4f09 000d77c4 20 1800 36 000d7ecc 000d7ee0 000da532 20 1800 37 000dac3a 000dac4e 000dd172 20 1800 38 000dd87a 000dd88e 000df0bd 20 1800 39 000df7c5 000df7ee 000e2f19 41 1800 40 000e3621 000e364a 000e5730 41 1800 41 000e5e38 000e5e4c 000e880c 20 1800 42 000e8f14 000e8f28 000eb7cf 20 1800 43 000ebed7 000ebeeb 000eee22 20 1800 44 000ef52f 000ef543 000f1e4f 20 1805 45 000f255b 000f2725 000f2761 458 1804 46 000f2e69 000f2e69 000f599f 0 1800 47 000f60a7 000f60f9 000f8e7f 82 1800 48 000f9587 000f95b0 000fb7e2 41 1800 49 000fbeea 000fbfd8 000febe4 238 1800 50 000ff2ec 000ff397 00100e44 171 1800 51 0010154c 0010154c 0010160c 0 1800 52 00101d14 00101dd9 00103a7e 197 1800 53 00104186 001041f2 00105a4a 108 1800 54 00106152 00106167 00107b79 21 1800 55 00108281 001083d0 0010b8cc 335 1800 56 0010bfd4 0010c012 0010f0dc 62 1800 57 0010f7e4 0010f7f8 00111a89 20 1800 58 00112191 001121cf 00114e23 62 1800 59 0011552b 0011553f 00118412 20 1800 60 00118b1a 00118b58 0011b9a5 62 1800 61 0011c0ad 0011c0c1 0011e8ab 20 1800 62 0011efb3 0011efdc 00121e25 41 1800 63 0012252d 00122541 00124dc4 20 1800 64 001254cc 001254e0 00128055 20 1800 65 0012875d 00128771 0012b036 20 1800 66 0012b73e 0012b767 0012e4ba 41 1800 67 0012ebc2 0012ebd6 00130d22 20 1800 68 0013142a 0013143e 00133d2f 20 1800 69 00134437 00134437 00136ef7 0 1800 70 001375ff 00137628 0013912e 41 1800 71 00139836 0013984a 0013c3f2 20 1800 72 0013cafa 0013cdbc 00140ab6 706 1800 73 001411be 00141269 00143321 171 1800 74 00143a29 00143a29 001455fa 0 1800 75 00145d02 00145d42 00148ae2 64 1800 76 001491ea 0014933f 0014a805 341 1800 77 0014af0d 0014af0d 0014b1eb 0 1800 78 0014b8f3 0014b94a 0014ef9d 87 1800 79 0014f6a5 0014f70d ffffffff 104 1800 80 0014f6a5 0015043c 00151442 3479 1373862 81 00151b4a 00151b4a 00153815 0 1800 82 00153f1d 00153fb3 ffffffff 150 1800 83 00153f1d 0015643c ffffffff 9503 1392414 84 00153f1d 0015663c ffffffff 10015 1392414 85 00153f1d 001566e0 ffffffff 10179 1392414 86 00153f1d 0015943c ffffffff 21791 1392414 87 00153f1d 0015983c 0015a55f 22815 1392414 88 0015ac67 0015ad0e ffffffff 167 1800 89 0015ac67 0015ca3c 0015d1c0 7637 1420392 90 0015d8c8 0015d8f1 0015fa0c 41 1800 91 00160114 00160129 ffffffff 21 1800 92 00160114 0016043c ffffffff 808 1442069 93 00160114 00161c3c 001620bf 6952 1442069 94 001627c7 001627db ffffffff 20 1800 95 001627c7 0016447b ffffffff 7348 1451976 96 001627c7 0016463c 001661b3 7797 1451976 97 001668bb 0016694d 00167a66 146 1800 98 0016816e 0016853f 00169fa2 977 1800 99 0016a6aa 0016a6aa 0016c07d 0 1800 100 0016c785 0016cae1 0016f7b4 860 1800 101 0016febc 0016fee5 00172dee 41 1800 102 001734f6 0017350a 00175cdb 20 1800 103 001763e3 0017640c 00177487 41 1800 104 00177b8f 00177bd1 00179239 66 1800 105 00179941 00179955 0017aae1 20 1800 106 0017b1e9 0017b212 0017c680 41 1800 107 0017cd88 0017ce2f 0017fbce 167 1800 108 001802d6 001802ff 00182cfe 41 1800 109 00183406 0018342f 0018591f 41 1800 110 00186027 00186066 0018821d 63 1800 111 00188925 001889a2 0018b8a6 125 1800 112 0018bfae 0018c452 0018d833 1188 1800 113 0018df3b 0018df8e 00190d5f 83 1800
I did more investigating of setting commands and encoding. I've updated the trac wiki with the full information. It should be complete but it's not tested or strongly verified. There may be mistakes.
I did some testing today reading raw data to see if I could download a usable memory dump for the dump function and avoid all that work done in _dump and eliminate the need for cochran_download.c.
The read command (0x15 <start> <bytes> 0x05) only reads logbook and sample data. I suspected that the gap between logbook and sample contained config or other data but that isn't the case. There is no gap between Commander logbook and samples and the EMC has no data of significance there.
Also, when asked to read past the end of data on the Commander, It returns 0xFF for data that doesn't seem to be there. For example, the Commander I have has rolled the sample data buffer so all the memory has been written. Sample data stops at 0x100000 (1MB). When I asked for 8MB of data I got 7MB of 0xFF.
Unless i discover a new command to read data I won't be able to deliver a raw memory dump.
I'm also no closer to deciphering the feature data from the computer so I don't know how much memory is present and enabled.
On 19-12-14 21:23, John Van Ostrand wrote:
I did more investigating of setting commands and encoding. I've updated the trac wiki with the full information. It should be complete but it's not tested or strongly verified. There may be mistakes.
I did some testing today reading raw data to see if I could download a usable memory dump for the dump function and avoid all that work done in _dump and eliminate the need for cochran_download.c.
The read command (0x15 <start> <bytes> 0x05) only reads logbook and sample data. I suspected that the gap between logbook and sample contained config or other data but that isn't the case. There is no gap between Commander logbook and samples and the EMC has no data of significance there.
Also, when asked to read past the end of data on the Commander, It returns 0xFF for data that doesn't seem to be there. For example, the Commander I have has rolled the sample data buffer so all the memory has been written. Sample data stops at 0x100000 (1MB). When I asked for 8MB of data I got 7MB of 0xFF.
Unless i discover a new command to read data I won't be able to deliver a raw memory dump.
I'm also no closer to deciphering the feature data from the computer so I don't know how much memory is present and enabled.
I'm not sure I'm understand what you're saying here. First you say you can download 1MB of data. But then you say you can't download a memory dump? I don't know what you consider a memory dump, but I would say it's that 1MB of data! Combined with the id, misc and config blobs that has everything we need. I have no idea what else you want.
PS: Can you send me the data from your Commander too. If you have data from the other models, I would like to have a look at that too.
Jef
On 08-12-14 00:50, John Van Ostrand wrote:
On Sun, Dec 7, 2014 at 3:14 PM, Jef Driesen jef@libdivecomputer.org wrote:
With the exception of the id, config and misc packets, the protocol is mainly based on simple random access to the internal memory. There is no dedicated command to read the logbook and samples. They are simply stored in a certain area of the internal memory. So for the dump api it makes much more sense to implement as full memory dump, instead of reading just two "random" pieces of the memory. The result is that the dump would be much more generic, because you no longer need to interpret the data (in order to find the logbook and samples). This is important when you need the memory dump to diagnose bugs in the logic for downloading the dives. The fact that we don't seem to know the total amount of memory, might be a problem here.
What I meant here is that in the data that can be downloaded with the read command there are two main areas of interest: the logbook and sample ringbuffers. Right now, you read some part of each ringbuffer, and pack them into your memory dump structure. But because the start address and size depends on the current implementation, that's not very useful as a memory dump.
I'll illustrate with an example. Assume for a moment that the current code finds valid sample data in the area 0x094000 to 0x096000. Later on we discover a bug in the code, and the end address should have been 0x098000. That means all memory dump that have been downloaded with the old code are now useless because they don't contain enough data. If the start address is wrong, it gets even worse.
A memory dump should be independent from whatever logic we use for locating the dives. If you simply read everything from address 0 to the end, then you we can keep using those old memory dumps. If a device has lots of memory, then yes, it may take a while to download, but that's fine. Downloading a memory dump is not the normal use-case. It's intended for troubleshooting, and then you do want all the data. (For reference, downloading a full memory on a Suunto Vyper takes 10-15 minutes, and that's only 8KB of data!)
If we really don't know the total amount of memory, then we'll have to make an assumption. If that assumptions turns out to be wrong, we can always adjust it. But at least we always have all data up to a certain address.
Since the other pieces of data (id, misc and config) have a fixed size, the structure for a good memory dump could be as simple as concatenating all pieces:
id 67 bytes misc 1500 bytes config 2x512 (emc) or 4x512 (commander) bytes memory variable length, memory starting at address
An alternative is to only include the memory part, and take care of the other three separately, by means of the vendor event. This is how it's done in the several other backends (e.g. oceanic and reefnet).
What I find interesting is that logbook data is at address 0 (for the read command) and that the sample data read is far enough along to justify the config pages and maybe some program data, although I have no idea how large the program for this could be. GIven the background of Mike Cochran I suspect it's tight code.
So for the EMC 16 and 20H the logbook is from 0 to 0x80000 (1024 entries) then samples begine at 0x94000. I haven't looked to see what the 0x14000 bytes between the logbook and sample are, I suspect config and program data, although I'm surprised that doesn't start a 0.
Now for the EMC 20H the quoted maximum sample data is 1450 hours. 3 byte samples * 60 seconds * 60 minutes * 1450 hours is 0xEEF3E0, or close to 16MB, when added to the 0x94000 base. It's short but I suspect that 1450 is a rounded number.
To add to this, EMC models are sold with different amounts of memory configured. Word from the dealer is that it's configurable via software. That means each 20H is shipped with the same hardware configuration and that something in one of the other data blocks indicates how much memory is enabled. I've been unable to find where that might be and I'm worried it's stored as a feature code instead of simply the amount of memory.
My advice is to start with a very simple assumption. If your assumption is wrong, then someday someone will be affected, and report it. At that point you can fix your assumption.
Let's look at logbook area. It seems the ringbuffer is located at address 0 to 0x80000 (1024 entries of 512 bytes) for the emc and address 0 to 0x20000 (512 entries of 256 bytes) for the commander . We also have the number of dives from the config block. If it's less than 1024/512, then we know the ringbuffer isn't full yet. A reasonable assumption is that it will be at offset 0. If there are more, then all entries will be valid and we can find the oldest one by means of the internal dive number. I think those are very reasonable assumptions. In fact that's pretty much the description of the OSTC3 algorithm.
Now, if a device is limited by its firmware to less than 1024/512 logbook entries, then the above will work fine until someone exceeds this artificial limit. At that point our algorithm will fail, and we'll get a bugreport. But the good news is that we now also have real data to confirm and fix the problem.
For the sample area, we know the start address of the ringbuffer, but not the end. What we could do is assume the ringbuffer is unlimited in size (e.g. address 0xFFFFFFFF, which is the maximum we can support with a 32bit integer) and fail when the end address is before the start address. Normally that indicates that we reached the end of the ringbuffer. Again, we now have access to data where we can figure out the exact ringbuffer end.
The idea is that you always start with a very simple assumptions, and improve them if they turn out to be wrong. That's in a nutshell how things are usually done in libdivecomputer. When you make an (educated) guess, without any hard evidence, then you have a very high chance that your guess is still wrong. But the problem is that the resulting bugs will be a lot more subtle and harder to spot and fix.
For example if a ringbuffer end address is wrong, then you'll either miss a few bytes (address too small) or have some garbage bytes inserted (address too large). A few missing samples can easily go unnoticed.
The next thing where I have some concerns, is the logic how you find the logbook and samples. It seems we only know the start address of the logbook and sample ringbuffers (respectively 0 and 0x094000 for the emc), but not the end address. And that results in a few issues:
We don't really know the start. I just worked with my first EMC-14 and its samples start at 0x20000, mostly because it supports only 256 logs.
If the first dive starts at that address, then it's likely the start address of the ringbuffer. Of course it's not a guarantee, but nevertheless a good assumption. Let's say it's correct until proven wrong :-)
For the logbook, you assume the first logbook entry is at the address 0, and then move forward until you reach the total number of dives. But in practice, the number of dives that can be stored is not unlimited, and the memory is most likely used in a circular way. Thus, once you have enough dives, this assumption will no longer be correct. Ideally we need to figure out the end of the ringbuffer, so we can take this into account correctly. If we are unable to find this address, then we need at least some checks to verify our assumptions (for example check that the dive number starts at zero and always increases, check that we don't go past the sample area, etc). If you ask me, returning an error is always better than silently returning some wrong data.
The number of log book entries is also configured via software. In other words the DC can be configured to ignore part of the logbook ringbuffer. It might be a head/tail pointer or something.
You can see that the number of dives is available in the config data block. I suspect that a starting dive number is also present but since it's set at 00 00 now it can't be identified. Also because of the insane number of dives this computer will log it would take me 8 years to wrap mine.
Checking dive numbers will actually work for most divers. I suspect that all but a few will never wrap the ring buffer before changing DCs.
See my explanation above. Have a look at the hw_ostc3_device_foreach function.
I believe I have found something else that will be of interest here.
At offset 0x142 in config0 there is an "end-of-profile" pointer. In the data I got from you it contains 0x00191467. That nicely corresponds to the end pointer (0x00190d5f) of the last logbook entry (#114), plus the 1800 byte "trailer". This is "end-of-profile" pointer exist in most data formats and is typically used internally by the dive computer as the position to start writing the sample data for the next dive.
The next 4 bytes contains the value 0xE400. This is the address where the next logbook entry will be written (114 * 512). So this is probably the "end-of-logbook" pointer.
With this information, the ringbuffer starts to look very similar to what I've seen in other data formats.
A similar problem exist when reading the samples. You have some code that tries to guess where the ringbuffer ends. But there is absolutely no guarantee that those assumptions are correct. I think it's much safer to not guess at all and just fail if the ringbuffer wrap point is crossed (e.g. begin address is higher than the end address). Once someone hits this error, we can investigate and determine the correct ringbuffer end address. If we're guessing, and get it wrong, then we're silently returning bogus data. That's not good. And the chance that your guess is wrong is pretty high. Although I agree that the total memory is likely nicely aligned, the sample ringbuffer can end anywhere. I've seen that more than once in other backends.
It's more likely that a ring buffer will wrap than the logbook. I have a Commander that has already wrapped (I bought it used) and so I have a working example. From USB traffic I could see the addresses used by Analyst and it's a nice clean alignment. I have no evidence on an EMC but it seems safe to assume it's a minor upgrade on the Commander software.
So looks like you already have some real data to figure out the ringbuffer end for the commander. Great.
Note that if the requested addresses are nicely aligned, then there is no guarantee that the ringbuffer end address is also nicely aligned. The application might read more data than it needs, there might be data that is not part of the ringbuffer but still used for something else, etc.
[One method that I have used successfully in the past to locate the ringbuffer end, without having access to data that crosses it, is by looking at the full memory dump. Usually unused data in flash memory contains all 0xFF. So if there is another block located right after the ringbuffer area, then the unused tail of the ringbuffer will contain 0xFF, followed by some non 0xFF data from that other block. Thus the first address containing non 0xFF bytes is very likely the end of the ringbuffer.]
There are three problems with that. First, for the EMC 20H, that's 16 MB of data and even at 800K baud it's going to be a while to download. Probing might make more sense. Reading 1K at certain points as a test.
A long download might be annoying, but it's not a big deal either. It's not something you'll need to do every day.
Second, since the sample ring buffer size is software configurable the end may not be apparent. I expect the end of the buffer is truncated on a DC configured with less memory. That means that the technique of probing will probably return a larger size than it should.
That might be true, but we can't write code to handle things we don't know yet. Only for the things we do know.
Third, I suspect that the end of sample memory is the end of physical memory. It's a guess based on the number of quoted number of samples supported and the starting address of samples. I don't know what will happen if we read beyond physical memory.
Without actually trying, I have no idea what will happen when you read past the available memory. The read request may fail, the device could automatically wrap around to some other address (and not necessary to zero), etc. All possible outcomes that I've seen in the past.
The next big question is where does the sample data begins/ends? So far you have identified three pointers: pre, begin and end address. When I dump those pointers (see below), I noticed something interesting:
- The 5th column is the difference between begin and pre. According
to your documentation, these are inter dive events. They can be zero length too. Should we consider those as part of the dive, or ignore them? Since I don't know what's in those interdive events, I have no idea.
- The 6th column is the difference between pre and end of the
previous entry. This is nearly always equal to 1800. I doubt that's a coincidence. I guess that means there is some 1800 byte header or trailer. Do you have any idea what's in there?
- When the end pointer is ffffffff, the next entry has the same pre
pointer. Because it makes no sense that the sample data for two different dives start at the same address, I assume this means that for some reason (e.g. battery empty, too short duration, etc) this dive has no sample data. What you did in this case is take the data until the start of the next dive. But if I'm right, that's the inter dive events. So this makes no sense to me. Do you get anything useful from doing this?
Pre-dive events consist of the following (and more) according to documentation.
- Initialization of the unit.
- The unit is turned on
- Low batteries
- Altitude Changes of over 500 feet
- Temperature Changes of 10 degrees
- Sensor Malfunction
- Analyst ® P.C. Communication
Interdive events like the above are multiple bytes in length, up to 20 bytes. The DC has no on or off switch. It turns on under certain conditions and off after a certain amount of off-gassing after a dive. So many dives will have at least an On event occur before dives. The first dive has a lot of events because I was configuring the device via Analyst and each config change seems to be logged.
The DC also stays on after a dive and logs off-gassing. I'd bet that's the 1800 bytes of logs after. Although I assumed it was longer. Off gassing samples are recognizable by the depth change and ascent rate samples being 0x40 and 0x80 respectively (these decode to negative zero.)
Do you think we should consider the pre-dive event as part of the dive data? Based on your description, these pre-dive events do not look really interesting from a dive logbook point of view. I have no idea how the parser should deal with it, other than just ignoring them.
A pointer of FFFFFFFF means the end of dive was never logged. I had a problem with my DC where it would reboot not logging the end of dive. The code tries to salvage the dive logs.
Can you recover some of the samples? Based on what I've seen so far (see my long table) it makes no sense to try to recover the samples, because it looks like at least part of the data you are recovering are the pre-dive events of another dive. That certainly doesn't look right.
Look for example at dive #79:
78 0014b8f3 0014b94a 0014ef9d 87 1800 79 0014f6a5 0014f70d ffffffff 104 1800 80 0014f6a5 0015043c 00151442 3479 1373862
Your code to guess the end pointer will replace ffffffff with 0015043c, which is the start of the next dive (#80). So for dive #79 you take the range 0014f70d to 0015043c as the sample data. But that memory range is also the last part of the pre-dive events for dive #80. This overlap doesn't make any sense to me.
I also tried to parse the test data I received from you, but I can't get anything useful out of it. That might be due to the fact that I don't know at which address the sample data was read when you created that data (that might have changed in your latest code). Btw, that's exactly why I prefer a more generic dump format as explained above. Can you provide some new data for me, or the correct address? I also tried to run the parser under valgrind, and it reports a *huge* number of problems. That might be due to this mismatch in data, but the parser should be robust against whatever you throw at it. The rule of thumb is never trust the input data, and always check everything before you use it. I didn't investigate further, because I first want to confirm with good input :-)
As you can see from the source, the reads are based on data from the config blocks and from the logbook. Config tells me how many logs to read. Reading the logbook for pointers results in knowing which memory addresses to read.
For an EMC 20H the sample data starts at 0x94000. It's been a while since I checked that the code is actually reading from that point, but it does round down 16k.
I've not tried to read the data, like you have. My simulator used earlier files with names that included their memory addresses. From a glance the data looks good.
But let's go through an example.
[...]
Ignore my question. I think it was a mistake on my side. Instead of modify the code to read the data from files, I extended the libdivecomputer simulator with the cochran protocol. It produces reasonable looking profiles now, so I assume there was some bug in previous hack to load the data from files.
I've been hesitant in reading the entire memory of the device. I may try with my used device, which is a Commander.
I've been holding off asking Cochran again for help. They previously refused citing concern that open source code would reveal too much about their algorithm. Now that the code exists I wonder if they would be willing to help by telling me how to determine model and memory configuration, or if they will demand that I stop.. It's also the same reason I've been trying to respect their privacy by not reading anything but my data.
That's their usual argument. The truth is that I've never encountered a device where you can download anything worth protecting (e.g. firmware code, decompression algorithm). All you can access is the memory containing the dives.
Jef
On Mon, Dec 22, 2014 at 10:38 AM, Jef Driesen jef@libdivecomputer.org wrote:
On 08-12-14 00:50, John Van Ostrand wrote:
On Sun, Dec 7, 2014 at 3:14 PM, Jef Driesen jef@libdivecomputer.org wrote:
With the exception of the id, config and misc packets, the protocol is mainly based on simple random access to the internal memory. There is no dedicated command to read the logbook and samples. They are simply stored in a certain area of the internal memory. So for the dump api it makes much more sense to implement as full memory dump, instead of reading just two "random" pieces of the memory. The result is that the dump would be much more generic, because you no longer need to interpret the data (in order to find the logbook and samples). This is important when you need the memory dump to diagnose bugs in the logic for downloading the dives. The fact that we don't seem to know the total amount of memory, might be a problem here.
What I meant here is that in the data that can be downloaded with the read command there are two main areas of interest: the logbook and sample ringbuffers. Right now, you read some part of each ringbuffer, and pack them into your memory dump structure. But because the start address and size depends on the current implementation, that's not very useful as a memory dump.
I'll illustrate with an example. Assume for a moment that the current code finds valid sample data in the area 0x094000 to 0x096000. Later on we discover a bug in the code, and the end address should have been 0x098000. That means all memory dump that have been downloaded with the old code are now useless because they don't contain enough data. If the start address is wrong, it gets even worse.
That makes sense.
A memory dump should be independent from whatever logic we use for locating the dives. If you simply read everything from address 0 to the end, then you we can keep using those old memory dumps. If a device has lots of memory, then yes, it may take a while to download, but that's fine. Downloading a memory dump is not the normal use-case. It's intended for troubleshooting, and then you do want all the data. (For reference, downloading a full memory on a Suunto Vyper takes 10-15 minutes, and that's only 8KB of data!)
If we really don't know the total amount of memory, then we'll have to make an assumption. If that assumptions turns out to be wrong, we can always adjust it. But at least we always have all data up to a certain address.
I'll have to commit to knowing the end of memory then. Right now I've avoided stating the end explicitly because I'm not sure where it is. If all Cochran DCs return 0xFF when past memory I can be a little liberal and not crash.
Since the other pieces of data (id, misc and config) have a fixed size, the structure for a good memory dump could be as simple as concatenating all pieces:
id 67 bytes misc 1500 bytes config 2x512 (emc) or 4x512 (commander) bytes memory variable length, memory starting at address
An alternative is to only include the memory part, and take care of the other three separately, by means of the vendor event. This is how it's done in the several other backends (e.g. oceanic and reefnet).
Considering I'm one of two users of this I'd like to have the extra data. And considering the person doing the dump knows the model it should be apparent how to parse the data so I don't need to make a generic format.
What I find interesting is that logbook data is at address 0 (for the read
command) and that the sample data read is far enough along to justify the config pages and maybe some program data, although I have no idea how large the program for this could be. GIven the background of Mike Cochran I suspect it's tight code.
So for the EMC 16 and 20H the logbook is from 0 to 0x80000 (1024 entries) then samples begine at 0x94000. I haven't looked to see what the 0x14000 bytes between the logbook and sample are, I suspect config and program data, although I'm surprised that doesn't start a 0.
Now for the EMC 20H the quoted maximum sample data is 1450 hours. 3 byte samples * 60 seconds * 60 minutes * 1450 hours is 0xEEF3E0, or close to 16MB, when added to the 0x94000 base. It's short but I suspect that 1450 is a rounded number.
To add to this, EMC models are sold with different amounts of memory configured. Word from the dealer is that it's configurable via software. That means each 20H is shipped with the same hardware configuration and that something in one of the other data blocks indicates how much memory is enabled. I've been unable to find where that might be and I'm worried it's stored as a feature code instead of simply the amount of memory.
My advice is to start with a very simple assumption. If your assumption is wrong, then someday someone will be affected, and report it. At that point you can fix your assumption.
Let's look at logbook area. It seems the ringbuffer is located at address 0 to 0x80000 (1024 entries of 512 bytes) for the emc and address 0 to 0x20000 (512 entries of 256 bytes) for the commander . We also have the number of dives from the config block. If it's less than 1024/512, then we know the ringbuffer isn't full yet. A reasonable assumption is that it will be at offset 0. If there are more, then all entries will be valid and we can find the oldest one by means of the internal dive number. I think those are very reasonable assumptions. In fact that's pretty much the description of the OSTC3 algorithm.
Now, if a device is limited by its firmware to less than 1024/512 logbook entries, then the above will work fine until someone exceeds this artificial limit. At that point our algorithm will fail, and we'll get a bugreport. But the good news is that we now also have real data to confirm and fix the problem.
For the sample area, we know the start address of the ringbuffer, but not the end. What we could do is assume the ringbuffer is unlimited in size (e.g. address 0xFFFFFFFF, which is the maximum we can support with a 32bit integer) and fail when the end address is before the start address. Normally that indicates that we reached the end of the ringbuffer. Again, we now have access to data where we can figure out the exact ringbuffer end.
The idea is that you always start with a very simple assumptions, and improve them if they turn out to be wrong. That's in a nutshell how things are usually done in libdivecomputer. When you make an (educated) guess, without any hard evidence, then you have a very high chance that your guess is still wrong. But the problem is that the resulting bugs will be a lot more subtle and harder to spot and fix.
For example if a ringbuffer end address is wrong, then you'll either miss a few bytes (address too small) or have some garbage bytes inserted (address too large). A few missing samples can easily go unnoticed.
The next thing where I have some concerns, is the logic how you find the
logbook and samples. It seems we only know the start address of the logbook and sample ringbuffers (respectively 0 and 0x094000 for the emc), but not the end address. And that results in a few issues:
We don't really know the start. I just worked with my first EMC-14 and its samples start at 0x20000, mostly because it supports only 256 logs.
If the first dive starts at that address, then it's likely the start address of the ringbuffer. Of course it's not a guarantee, but nevertheless a good assumption. Let's say it's correct until proven wrong :-)
For the logbook, you assume the first logbook entry is at the address 0,
and then move forward until you reach the total number of dives. But in practice, the number of dives that can be stored is not unlimited, and the memory is most likely used in a circular way. Thus, once you have enough dives, this assumption will no longer be correct. Ideally we need to figure out the end of the ringbuffer, so we can take this into account correctly. If we are unable to find this address, then we need at least some checks to verify our assumptions (for example check that the dive number starts at zero and always increases, check that we don't go past the sample area, etc). If you ask me, returning an error is always better than silently returning some wrong data.
The number of log book entries is also configured via software. In other words the DC can be configured to ignore part of the logbook ringbuffer. It might be a head/tail pointer or something.
You can see that the number of dives is available in the config data block. I suspect that a starting dive number is also present but since it's set at 00 00 now it can't be identified. Also because of the insane number of dives this computer will log it would take me 8 years to wrap mine.
Checking dive numbers will actually work for most divers. I suspect that all but a few will never wrap the ring buffer before changing DCs.
See my explanation above. Have a look at the hw_ostc3_device_foreach function.
I believe I have found something else that will be of interest here.
At offset 0x142 in config0 there is an "end-of-profile" pointer. In the data I got from you it contains 0x00191467. That nicely corresponds to the end pointer (0x00190d5f) of the last logbook entry (#114), plus the 1800 byte "trailer". This is "end-of-profile" pointer exist in most data formats and is typically used internally by the dive computer as the position to start writing the sample data for the next dive.
The next 4 bytes contains the value 0xE400. This is the address where the next logbook entry will be written (114 * 512). So this is probably the "end-of-logbook" pointer.
With this information, the ringbuffer starts to look very similar to what I've seen in other data formats.
Good catch. It looks like I'm wrong about the start address of the EMC14 samples, it looks like 0x22000 based on the empty DC. Now to find that on the Commander.
A similar problem exist when reading the samples. You have some code that
tries to guess where the ringbuffer ends. But there is absolutely no guarantee that those assumptions are correct. I think it's much safer to not guess at all and just fail if the ringbuffer wrap point is crossed (e.g. begin address is higher than the end address). Once someone hits this error, we can investigate and determine the correct ringbuffer end address. If we're guessing, and get it wrong, then we're silently returning bogus data. That's not good. And the chance that your guess is wrong is pretty high. Although I agree that the total memory is likely nicely aligned, the sample ringbuffer can end anywhere. I've seen that more than once in other backends.
It's more likely that a ring buffer will wrap than the logbook. I have a Commander that has already wrapped (I bought it used) and so I have a working example. From USB traffic I could see the addresses used by Analyst and it's a nice clean alignment. I have no evidence on an EMC but it seems safe to assume it's a minor upgrade on the Commander software.
So looks like you already have some real data to figure out the ringbuffer end for the commander. Great.
Note that if the requested addresses are nicely aligned, then there is no guarantee that the ringbuffer end address is also nicely aligned. The application might read more data than it needs, there might be data that is not part of the ringbuffer but still used for something else, etc.
I haven't seen evidence of that on the Commander. The data looks like it runs to the end.
[One method that I have used successfully in the past to locate the
ringbuffer end, without having access to data that crosses it, is by looking at the full memory dump. Usually unused data in flash memory contains all 0xFF. So if there is another block located right after the ringbuffer area, then the unused tail of the ringbuffer will contain 0xFF, followed by some non 0xFF data from that other block. Thus the first address containing non 0xFF bytes is very likely the end of the ringbuffer.]
There are three problems with that. First, for the EMC 20H, that's 16 MB of data and even at 800K baud it's going to be a while to download. Probing might make more sense. Reading 1K at certain points as a test.
A long download might be annoying, but it's not a big deal either. It's not something you'll need to do every day.
Second, since the sample ring buffer size is software configurable the end
may not be apparent. I expect the end of the buffer is truncated on a DC configured with less memory. That means that the technique of probing will probably return a larger size than it should.
That might be true, but we can't write code to handle things we don't know yet. Only for the things we do know.
Third, I suspect that the end of sample memory is the end of physical
memory. It's a guess based on the number of quoted number of samples supported and the starting address of samples. I don't know what will happen if we read beyond physical memory.
Without actually trying, I have no idea what will happen when you read past the available memory. The read request may fail, the device could automatically wrap around to some other address (and not necessary to zero), etc. All possible outcomes that I've seen in the past.
The next big question is where does the sample data begins/ends? So far
you have identified three pointers: pre, begin and end address. When I dump those pointers (see below), I noticed something interesting:
- The 5th column is the difference between begin and pre. According to
your documentation, these are inter dive events. They can be zero length too. Should we consider those as part of the dive, or ignore them? Since I don't know what's in those interdive events, I have no idea.
- The 6th column is the difference between pre and end of the previous
entry. This is nearly always equal to 1800. I doubt that's a coincidence. I guess that means there is some 1800 byte header or trailer. Do you have any idea what's in there?
- When the end pointer is ffffffff, the next entry has the same pre
pointer. Because it makes no sense that the sample data for two different dives start at the same address, I assume this means that for some reason (e.g. battery empty, too short duration, etc) this dive has no sample data. What you did in this case is take the data until the start of the next dive. But if I'm right, that's the inter dive events. So this makes no sense to me. Do you get anything useful from doing this?
Pre-dive events consist of the following (and more) according to
documentation.
- Initialization of the unit.
- The unit is turned on
- Low batteries
- Altitude Changes of over 500 feet
- Temperature Changes of 10 degrees
- Sensor Malfunction
- Analyst ® P.C. Communication
Interdive events like the above are multiple bytes in length, up to 20 bytes. The DC has no on or off switch. It turns on under certain conditions and off after a certain amount of off-gassing after a dive. So many dives will have at least an On event occur before dives. The first dive has a lot of events because I was configuring the device via Analyst and each config change seems to be logged.
The DC also stays on after a dive and logs off-gassing. I'd bet that's the 1800 bytes of logs after. Although I assumed it was longer. Off gassing samples are recognizable by the depth change and ascent rate samples being 0x40 and 0x80 respectively (these decode to negative zero.)
Do you think we should consider the pre-dive event as part of the dive data? Based on your description, these pre-dive events do not look really interesting from a dive logbook point of view. I have no idea how the parser should deal with it, other than just ignoring them.
If the data were readily available I'd be interested in some of the data, like altitude and temp changes while travelling. However I've looked at some of the raw pre-dive data and I can't parse the event data. All that said I think this data is a passing interest.
I took my cue from Analyst. It doesn't show the pre-dive events on dive profiles nor does it show the off-gassing data. It has an option to display "inter-dive" events and does so similar to a dive. I've not been interested in the data, except to find out what it was. I've been thinking of libdivecomputer in the context of Subsurface which has no mechanism to show inter-dive events or data. I think libdivecomputer would need some other mechanism to present this data and if only the Cochran does this it may not make sense to spend the time.
A pointer of FFFFFFFF means the end of dive was never logged. I had a
problem with my DC where it would reboot not logging the end of dive. The code tries to salvage the dive logs.
Can you recover some of the samples? Based on what I've seen so far (see my long table) it makes no sense to try to recover the samples, because it looks like at least part of the data you are recovering are the pre-dive events of another dive. That certainly doesn't look right.
Look for example at dive #79:
78 0014b8f3 0014b94a 0014ef9d 87 1800 79 0014f6a5 0014f70d ffffffff 104 1800 80 0014f6a5 0015043c 00151442 3479 1373862
Your code to guess the end pointer will replace ffffffff with 0015043c, which is the start of the next dive (#80). So for dive #79 you take the range 0014f70d to 0015043c as the sample data. But that memory range is also the last part of the pre-dive events for dive #80. This overlap doesn't make any sense to me.
You can see several of those starting about there. The battery was low and the DC was starting to exhibit the crash problem so I was intentionally invoking it about that time so I could be confident in describing the problem.
Take a look at
95 001627c7 0016447b ffffffff 7348 1451976 96 001627c7 0016463c 001661b3 7797 1451976
That 7348 byte of samples on dive 95 was a 40 minute dive in Tobermory, Canada followed by another about that length. Both imported mostly correctly. I'd rather have that flawed dive profile than nothing.
I'm not sure what dive 79 was, but I figured the user would rather see corrupt dive imported than none so s/he could make a choice.
I also tried to parse the test data I received from you, but I can't get
anything useful out of it. That might be due to the fact that I don't know at which address the sample data was read when you created that data (that might have changed in your latest code). Btw, that's exactly why I prefer a more generic dump format as explained above. Can you provide some new data for me, or the correct address? I also tried to run the parser under valgrind, and it reports a *huge* number of problems. That might be due to this mismatch in data, but the parser should be robust against whatever you throw at it. The rule of thumb is never trust the input data, and always check everything before you use it. I didn't investigate further, because I first want to confirm with good input :-)
As you can see from the source, the reads are based on data from the config blocks and from the logbook. Config tells me how many logs to read. Reading the logbook for pointers results in knowing which memory addresses to read.
For an EMC 20H the sample data starts at 0x94000. It's been a while since I checked that the code is actually reading from that point, but it does round down 16k.
I've not tried to read the data, like you have. My simulator used earlier files with names that included their memory addresses. From a glance the data looks good.
But let's go through an example.
[...]
Ignore my question. I think it was a mistake on my side. Instead of modify the code to read the data from files, I extended the libdivecomputer simulator with the cochran protocol. It produces reasonable looking profiles now, so I assume there was some bug in previous hack to load the data from files.
I've been hesitant in reading the entire memory of the device. I may try
with my used device, which is a Commander.
I've been holding off asking Cochran again for help. They previously refused citing concern that open source code would reveal too much about their algorithm. Now that the code exists I wonder if they would be willing to help by telling me how to determine model and memory configuration, or if they will demand that I stop.. It's also the same reason I've been trying to respect their privacy by not reading anything but my data.
That's their usual argument. The truth is that I've never encountered a device where you can download anything worth protecting (e.g. firmware code, decompression algorithm). All you can access is the memory containing the dives.
Jef
Have you had any success in changing their minds? Do you have any good examples of DC manufacturers that have been open that I could use as examples when talking to Cochran next?
John,
I finally had some time again for another look at the cochran backend. This time I concentrated mainly on the data from your Commander. The main issue there is that the profile ringbuffer is completely full. The oldest logbook entries are pointing to profile data that has already been overwritten with newer dives. Because this is not handled properly, those older dives end up with completely bogus profile data.
There are 347 logbook entries, and the end-of-profile (eop) pointer contains the value 0x0009a20d. If I dump the pre, begin and end pointers again, I get this:
346 00098290 00098478 00099d5a 488 1203 345 00097d5f 00097d5f 00097ddf 0 1201 344 0009661a 000966d3 000978ae 185 1201 ... 225 0009b16e 0009b16e 0009cbce 0 1201 224 00099e22 00099e6f 0009acbd 77 1201 <- Contains the eop address. 223 00096e85 00098109 00099971 4740 1201 ... 78 0009b79a 0009c2a1 0009d247 2823 1205 77 00099eb5 00099edf 0009b2e9 42 1201 <- Yet another time here. 76 00098588 000985c4 00099a04 60 1201 ... 1 00021928 00021a00 00023420 216 1201 0 00020000 000201fd 00021477 509 1201
As you can see dive #224 contains the eop address, which means part of the profile belongs to dive #346. Thus all older dives no longer have valid profile data.
The easiest solution to fix this is to process dives from the oldest to the newest one (which you already do), and sum the profile length. Once the total length exceeds the size of the ringbuffer (e.g. the maximum amount of profile data), all older dives will no longer have profile data. For those dives you only have the logbook entry.
I have attached a patch with a proof of concept implementation. It's far from complete, but I think you'll get the idea. The patch also illustrate how you can nicely concentrate all the main logic for downloading the dives into the foreach function, instead of scattered over many functions. The main idea is that you try to separate the download and parsing logic as much as possible.
(More comments inline.)
On 2014-12-22 21:58, John Van Ostrand wrote:
On Mon, Dec 22, 2014 at 10:38 AM, Jef Driesen jef@libdivecomputer.org
A memory dump should be independent from whatever logic we use for locating the dives. If you simply read everything from address 0 to the end, then you we can keep using those old memory dumps. If a device has lots of memory, then yes, it may take a while to download, but that's fine. Downloading a memory dump is not the normal use-case. It's intended for troubleshooting, and then you do want all the data. (For reference, downloading a full memory on a Suunto Vyper takes 10-15 minutes, and that's only 8KB of data!)
If we really don't know the total amount of memory, then we'll have to make an assumption. If that assumptions turns out to be wrong, we can always adjust it. But at least we always have all data up to a certain address.
I'll have to commit to knowing the end of memory then. Right now I've avoided stating the end explicitly because I'm not sure where it is. If all Cochran DCs return 0xFF when past memory I can be a little liberal and not crash.
Rather than assuming a value that might be too large, I would start with a relative small value and increase whenever there is evidence that there is indeed more memory. Since the last memory area contains the profile ringbuffer, this evidence will most likely be a ringbuffer pointer that points past our assumed end. That's something that's very easy to check.
If we start with a value that is too large, and there is a device where the profile data crosses the ringbuffer end, then we will bogus profile data. I'll illustrate with an example. Assume for a momement that the real profile ringbuffer is in the range 0x200-0x400, and we (incorrectly) assume the end is at 0x800. If there happens to be a dive that crosses the ringbuffer end, for example from 0x380 to 0x280, then we'll read a first part from 0x380 to 0x800 and a second part from 0x200 to 0x280.
200 280 380 400 800 |xxxxx xxxxx| | <- Correct |yyyyy yyyyy|yyyyyyyyyyyyyy| <- Wrong
As you can see that first part contains an extra 0x400 bytes that shouldn't be there. We can't easily detect this kind of errors, other than the user noticing the resulting profile is completely bogus.
If we had estimated the amount of memory too small, for example at 0x300, then we would be able to tell immediately that the 0x380 pointer is outside the allowed range. Then we know we have to adjust the range.
Since the other pieces of data (id, misc and config) have a fixed size, the structure for a good memory dump could be as simple as concatenating all pieces:
id 67 bytes misc 1500 bytes config 2x512 (emc) or 4x512 (commander) bytes memory variable length, memory starting at address
An alternative is to only include the memory part, and take care of the other three separately, by means of the vendor event. This is how it's done in the several other backends (e.g. oceanic and reefnet).
Considering I'm one of two users of this I'd like to have the extra data. And considering the person doing the dump knows the model it should be apparent how to parse the data so I don't need to make a generic format.
I'm not sure I understand your concerns here. Of course we want that extra data. To locate the dives, we need at a minimum the id and config0 packets. And depending on what we discover, we might need the others too at some point. I did only mention that instead of pre-pending the id, misc and config blocks to the memory dump, we could deliver them separately by means of the vendor event. That's how it's done in those other backends.
Do you think we should consider the pre-dive event as part of the dive data? Based on your description, these pre-dive events do not look really interesting from a dive logbook point of view. I have no idea how the parser should deal with it, other than just ignoring them.
If the data were readily available I'd be interested in some of the data, like altitude and temp changes while travelling. However I've looked at some of the raw pre-dive data and I can't parse the event data. All that said I think this data is a passing interest.
I took my cue from Analyst. It doesn't show the pre-dive events on dive profiles nor does it show the off-gassing data. It has an option to display "inter-dive" events and does so similar to a dive. I've not been interested in the data, except to find out what it was. I've been thinking of libdivecomputer in the context of Subsurface which has no mechanism to show inter-dive events or data. I think libdivecomputer would need some other mechanism to present this data and if only the Cochran does this it may not make sense to spend the time.
That's also my impression.
But remember that even if libdivecomputer itself doesn't use this information nor provides an api to parse it, there might be someone out there who is interested in the raw data. It's perfectly possible to build a cochran specific application that use libdivecomputer only for downloading the data, and does the parsing on its own. Probably not a very likely scenario, but not impossible. Btw, that's also the reason why libdivecomputer always provides access to the raw data!
Note that if we now strip this info, and change our minds later one, then it might be too late. Changing the data format will break backwards compatibility.
A pointer of FFFFFFFF means the end of dive was never logged. I had a problem with my DC where it would reboot not logging the end of dive. The code tries to salvage the dive logs.
Can you recover some of the samples? Based on what I've seen so far (see my long table) it makes no sense to try to recover the samples, because it looks like at least part of the data you are recovering are the pre-dive events of another dive. That certainly doesn't look right.
Look for example at dive #79:
78 0014b8f3 0014b94a 0014ef9d 87 1800 79 0014f6a5 0014f70d ffffffff 104 1800 80 0014f6a5 0015043c 00151442 3479 1373862
Your code to guess the end pointer will replace ffffffff with 0015043c, which is the start of the next dive (#80). So for dive #79 you take the range 0014f70d to 0015043c as the sample data. But that memory range is also the last part of the pre-dive events for dive #80. This overlap doesn't make any sense to me.
You can see several of those starting about there. The battery was low and the DC was starting to exhibit the crash problem so I was intentionally invoking it about that time so I could be confident in describing the problem.
Take a look at
95 001627c7 0016447b ffffffff 7348 1451976 96 001627c7 0016463c 001661b3 7797 1451976
That 7348 byte of samples on dive 95 was a 40 minute dive in Tobermory, Canada followed by another about that length. Both imported mostly correctly. I'd rather have that flawed dive profile than nothing.
I'm not sure what dive 79 was, but I figured the user would rather see corrupt dive imported than none so s/he could make a choice.
If you look at dive #96, then there is 7797 bytes of pre-dive events in the range 001627c7-0016463c. But 7348 of those bytes are also the pre-dive events of dive #96 (range 001627c7-0016447b). That's what puzzles me. How can the pre-dive events of two dives overlap? That simply doesn't make any sense to me.
If you then guess the end address of dive #95 to be the start address of dive #96, then the samples of dive #95 end up as somewhere in the pre-dive data of dive #96. That's just weird.
The only explanation that I can come up with is that the dive computer started writing dive #95. Then it failed before the dive was finished (e.g. battery empty, reboot or whatever) and the end pointer didn't get written correctly. Hence the ffffffff value. But the end-of-profile pointer is probably only updated after the dive is finished. Hence the pre-dive pointer of the next dive retains the same value. I suspect that the current write position is somehow preserved and thus the next dive will start from there again. But this is of course just guessing.
I understand why you're trying to salvage those corrupt dives (in the sense that having something is better than nothing at all), but I worry that this will complicate the code or produce bogus data (which is even worse). When I look at some of those salvaged dives, that seems to be the case already. They end abrupt, which is probably normal. But there appears to be some amount of "random" data (e.g. spikes in the profile) too. And that's not good. Maybe this is just a bug in the parsing that we can fix? I simply don't know at this point.
What does the Cochran application do with those corrupt dives? Does it show them? With or without a profile?
I've been holding off asking Cochran again for help. They previously refused citing concern that open source code would reveal too much about their algorithm. Now that the code exists I wonder if they would be willing to help by telling me how to determine model and memory configuration, or if they will demand that I stop.. It's also the same reason I've been trying to respect their privacy by not reading anything but my data.
That's their usual argument. The truth is that I've never encountered a device where you can download anything worth protecting (e.g. firmware code, decompression algorithm). All you can access is the memory containing the dives.
Have you had any success in changing their minds? Do you have any good examples of DC manufacturers that have been open that I could use as examples when talking to Cochran next?
My experience so far is that either they are open minded (HW, Reefnet, Atomics, Shearwater, etc) or not. Convincing them appears to be rather difficult :-(
Jef
On Tue, Jan 13, 2015 at 8:06 AM, Jef Driesen jef@libdivecomputer.org wrote:
John,
I finally had some time again for another look at the cochran backend. This time I concentrated mainly on the data from your Commander. The main issue there is that the profile ringbuffer is completely full. The oldest logbook entries are pointing to profile data that has already been overwritten with newer dives. Because this is not handled properly, those older dives end up with completely bogus profile data.
There are 347 logbook entries, and the end-of-profile (eop) pointer contains the value 0x0009a20d. If I dump the pre, begin and end pointers again, I get this:
346 00098290 00098478 00099d5a 488 1203 345 00097d5f 00097d5f 00097ddf 0 1201 344 0009661a 000966d3 000978ae 185 1201 ... 225 0009b16e 0009b16e 0009cbce 0 1201 224 00099e22 00099e6f 0009acbd 77 1201 <- Contains the eop address. 223 00096e85 00098109 00099971 4740 1201 ... 78 0009b79a 0009c2a1 0009d247 2823 1205 77 00099eb5 00099edf 0009b2e9 42 1201 <- Yet another time here. 76 00098588 000985c4 00099a04 60 1201 ... 1 00021928 00021a00 00023420 216 1201 0 00020000 000201fd 00021477 509 1201
As you can see dive #224 contains the eop address, which means part of the profile belongs to dive #346. Thus all older dives no longer have valid profile data.
The easiest solution to fix this is to process dives from the oldest to the newest one (which you already do), and sum the profile length. Once the total length exceeds the size of the ringbuffer (e.g. the maximum amount of profile data), all older dives will no longer have profile data. For those dives you only have the logbook entry.
I have attached a patch with a proof of concept implementation. It's far from complete, but I think you'll get the idea. The patch also illustrate how you can nicely concentrate all the main logic for downloading the dives into the foreach function, instead of scattered over many functions. The main idea is that you try to separate the download and parsing logic as much as possible.
(More comments inline.)
Is the application going to expect to see dives from oldest to newest?
On 2014-12-22 21:58, John Van Ostrand wrote:
On Mon, Dec 22, 2014 at 10:38 AM, Jef Driesen jef@libdivecomputer.org
A memory dump should be independent from whatever logic we use for locating the dives. If you simply read everything from address 0 to the end, then you we can keep using those old memory dumps. If a device has lots of memory, then yes, it may take a while to download, but that's fine. Downloading a memory dump is not the normal use-case. It's intended for troubleshooting, and then you do want all the data. (For reference, downloading a full memory on a Suunto Vyper takes 10-15 minutes, and that's only 8KB of data!)
If we really don't know the total amount of memory, then we'll have to make an assumption. If that assumptions turns out to be wrong, we can always adjust it. But at least we always have all data up to a certain address.
I'll have to commit to knowing the end of memory then. Right now I've avoided stating the end explicitly because I'm not sure where it is. If all Cochran DCs return 0xFF when past memory I can be a little liberal and not crash.
Rather than assuming a value that might be too large, I would start with a relative small value and increase whenever there is evidence that there is indeed more memory. Since the last memory area contains the profile ringbuffer, this evidence will most likely be a ringbuffer pointer that points past our assumed end. That's something that's very easy to check.
Certainly it's easier if we know where memory starts and ends and I may be close to figuring that out. I've programmed this with a few ideals in mind. The first is to not dump a dive just because we aren't absolutely sure of the data. I argue this because I assume most end users of libdivecomputer (e.g. subsurface users) are not relying on the data to validate diving algorithms, they want to see a graphical representation of their dive. If they are like me they want to see most of their dive even if the last few samples are wrong. This explains why I'd like to see an attempt to retrieve corrupt dives and dives that wrap the sample buffer. In my experience it shows useful data. I'll get to my other ideals later.
The code is and was using the logic you describe above initially because I had limited experience with Cochran DCs. I use logic like that in cochran_get_sample_parms() where I interate through the dives to establish a low/high range of sample data. I round down or up as appropriate to 16k boundaries. 16K representing 91 min of dive samples on an EMC or 136 min on a Commander. I figured this was a reasonable number to capture most recreational dives that wrapped. Rounding both values is an attempt to capture samples for dives that have wrapped the DC buffer. On a new computer the first log entry should point to the start of memory however consider a DC where the log book has wrapped. The log entry with the lowest start memory location will probably not point to the start of sample memory. The start of sample memory will likely have sample data from the last dive that wrapped. The same is true for the end of sample memory. In other words the log book is not guaranteed to have a dive that points to the exact start or end of memory. We could ignore any dive with wrapping sample data, as you recommend but I think there's a good case for doing everything to present a dive to the end user.
The rounded sample-end-pointer was also used in cochran_guess_sample_end_address() prior to commit e4608460f246565a260e821f7869de668f79caa8. Guessing was done when the dive end wasn't recorded properly. The sample-end-pointer was also used to reassemble sample data for dives that wrapped the sample buffer. I should revisit this code to make sure it still works as intended.
It wasn't until recent changes to the dump function that I decided I would commit to a memory end value. You expressed concern about guessing the end of samples, specifically where they wrapped and so I removed guessing entirely and went with hard coded values. I also based this decision on what Cochran's dealer said and what I've seen with the DCs I've tested. I've mentioned before that the DC can be configured to use less memory, dealers have the ability to restrict and grant memory usage. I'm told that the dealer's software does this and as such I don't know if Cochran charges the dealer or if this is pure profit for the dealer. Keeping that in mind I haven't seen a Cochran with restricted memory, partly perhaps because I have only seen one where sample memory was exhausted and possibly because dealers (maybe this one in particular) just grant all the memory or customers order it enabled. Basically I became much more comfortable hard coding the sample-end-pointer based on my experience.
I can understand the point of view that we should fail when we are unsure of data and then expect the end user to file a bug report. This would likely get earlier reports of errors but in the interim many new users might be turned away when the software fails to work for them.
If we start with a value that is too large, and there is a device where the profile data crosses the ringbuffer end, then we will bogus profile data. I'll illustrate with an example. Assume for a momement that the real profile ringbuffer is in the range 0x200-0x400, and we (incorrectly) assume the end is at 0x800. If there happens to be a dive that crosses the ringbuffer end, for example from 0x380 to 0x280, then we'll read a first part from 0x380 to 0x800 and a second part from 0x200 to 0x280.
200 280 380 400 800 |xxxxx xxxxx| | <- Correct |yyyyy yyyyy|yyyyyyyyyyyyyy| <- Wrong
As you can see that first part contains an extra 0x400 bytes that shouldn't be there. We can't easily detect this kind of errors, other than the user noticing the resulting profile is completely bogus.
If we had estimated the amount of memory too small, for example at 0x300, then we would be able to tell immediately that the 0x380 pointer is outside the allowed range. Then we know we have to adjust the range.
Since the other pieces of data (id, misc and config) have a fixed size,
the structure for a good memory dump could be as simple as concatenating all pieces:
id 67 bytes misc 1500 bytes config 2x512 (emc) or 4x512 (commander) bytes memory variable length, memory starting at address
An alternative is to only include the memory part, and take care of the other three separately, by means of the vendor event. This is how it's done in the several other backends (e.g. oceanic and reefnet).
Considering I'm one of two users of this I'd like to have the extra data. And considering the person doing the dump knows the model it should be apparent how to parse the data so I don't need to make a generic format.
I'm not sure I understand your concerns here. Of course we want that extra data. To locate the dives, we need at a minimum the id and config0 packets. And depending on what we discover, we might need the others too at some point. I did only mention that instead of pre-pending the id, misc and config blocks to the memory dump, we could deliver them separately by means of the vendor event. That's how it's done in those other backends.
Do you think we should consider the pre-dive event as part of the dive
data? Based on your description, these pre-dive events do not look really interesting from a dive logbook point of view. I have no idea how the parser should deal with it, other than just ignoring them.
If the data were readily available I'd be interested in some of the data, like altitude and temp changes while travelling. However I've looked at some of the raw pre-dive data and I can't parse the event data. All that said I think this data is a passing interest.
I took my cue from Analyst. It doesn't show the pre-dive events on dive profiles nor does it show the off-gassing data. It has an option to display "inter-dive" events and does so similar to a dive. I've not been interested in the data, except to find out what it was. I've been thinking of libdivecomputer in the context of Subsurface which has no mechanism to show inter-dive events or data. I think libdivecomputer would need some other mechanism to present this data and if only the Cochran does this it may not make sense to spend the time.
That's also my impression.
But remember that even if libdivecomputer itself doesn't use this information nor provides an api to parse it, there might be someone out there who is interested in the raw data. It's perfectly possible to build a cochran specific application that use libdivecomputer only for downloading the data, and does the parsing on its own. Probably not a very likely scenario, but not impossible. Btw, that's also the reason why libdivecomputer always provides access to the raw data!
Note that if we now strip this info, and change our minds later one, then it might be too late. Changing the data format will break backwards compatibility.
I agree it would be nice to provide this data via API. If we decide to provide this data via the existing API it's going to mess up a lot of existing applications. The inter-dive events may not be so bad, since there are usually only a few events, keep in mind some dives have a lot. Also I don't know how to parse data from these events.
The off-gassing data, if sent out through the samples_foreach function are going to be incorporated into the aplications' dive profiles and appear as flat trailing lines in the profile. Temperature might be the only interesting sample and barely so.
Already other interesting data from the Cochran data stream is not provided through the API. Data like deepest stop time and tissue compartment loads are not accessible. To make off-gassing samples meaningful we should also present the in-dive TC loads and then we should indicate to the application that this is inter-dive data.
A pointer of FFFFFFFF means the end of dive was never logged. I had a
problem with my DC where it would reboot not logging the end of dive. The code tries to salvage the dive logs.
Can you recover some of the samples? Based on what I've seen so far (see my long table) it makes no sense to try to recover the samples, because it looks like at least part of the data you are recovering are the pre-dive events of another dive. That certainly doesn't look right.
Look for example at dive #79:
78 0014b8f3 0014b94a 0014ef9d 87 1800 79 0014f6a5 0014f70d ffffffff 104 1800 80 0014f6a5 0015043c 00151442 3479 1373862
Your code to guess the end pointer will replace ffffffff with 0015043c, which is the start of the next dive (#80). So for dive #79 you take the range 0014f70d to 0015043c as the sample data. But that memory range is also the last part of the pre-dive events for dive #80. This overlap doesn't make any sense to me.
You can see several of those starting about there. The battery was low and the DC was starting to exhibit the crash problem so I was intentionally invoking it about that time so I could be confident in describing the problem.
Take a look at
95 001627c7 0016447b ffffffff 7348 1451976 96 001627c7 0016463c 001661b3 7797 1451976
That 7348 byte of samples on dive 95 was a 40 minute dive in Tobermory, Canada followed by another about that length. Both imported mostly correctly. I'd rather have that flawed dive profile than nothing.
I'm not sure what dive 79 was, but I figured the user would rather see corrupt dive imported than none so s/he could make a choice.
If you look at dive #96, then there is 7797 bytes of pre-dive events in the
range 001627c7-0016463c. But 7348 of those bytes are also the pre-dive events of dive #96 (range 001627c7-0016447b). That's what puzzles me. How can the pre-dive events of two dives overlap? That simply doesn't make any sense to me.
If you then guess the end address of dive #95 to be the start address of dive #96, then the samples of dive #95 end up as somewhere in the pre-dive data of dive #96. That's just weird.
The only explanation that I can come up with is that the dive computer started writing dive #95. Then it failed before the dive was finished (e.g. battery empty, reboot or whatever) and the end pointer didn't get written correctly. Hence the ffffffff value. But the end-of-profile pointer is probably only updated after the dive is finished. Hence the pre-dive pointer of the next dive retains the same value. I suspect that the current write position is somehow preserved and thus the next dive will start from there again. But this is of course just guessing.
That's what I think too. given that flash can be slow and have wears with writes I expect it's logical to eliminate unnecessary writes. Based on what I've seen the Cochran code writes the first half of the log when the dive starts (e.g. deeper than 6ft). There are two end-points to a dive. The first is when the diver ascends to 3ft. I think at this point Cochran updates the sample-end-pointer in config0. Cochran then continues logging until the configured "Post dive surface interval minutes" (10 - 20 minutes) elapses. This is why you see 1800 bytes (10 min * 60 seconds * 3 samples/sec) between dive-end-pointer and the pre-dive-pointer. At any time prior to this elapsing a submersion to below 6 ft continues the dive. I think this is why these post-dive samples exist. After the elapsed interval, the end-dive log is written and now the pre-dive-pointer is known and can be updated in config0.
So although the pre-dive pointer is bogus on the dive after a corrupt dive, the dive sample data of the corrupt dive is intact but determining the end has been difficult. Now that I've thought through the Cochran logic, I should go over my logic to see if I can better determine the end of a corrupt dive.
I understand why you're trying to salvage those corrupt dives (in the sense
that having something is better than nothing at all), but I worry that this will complicate the code or produce bogus data (which is even worse). When I look at some of those salvaged dives, that seems to be the case already. They end abrupt, which is probably normal. But there appears to be some amount of "random" data (e.g. spikes in the profile) too. And that's not good. Maybe this is just a bug in the parsing that we can fix? I simply don't know at this point.
The code ins't too complicated because of it. A corrupt dive is known because of the FFFFFFFF dive-end-pointer. Knowing that we loop, exiting on a memory boundary, when we see a sample that doesn't make sense (0xFF), when we see normal end-dive events or normal start-dive events. If we don't find end-dive events and we stop because of start-dive events we may have processed some inter-dive events as sample data and I think that's why we see the spikes at the end. It's a few lines of code in the parse function plus the guess function and a few if-thens. Since corrupt dives don't have end-dive log data the code parses the samples for depth, temp and duration, etc.
Now that I've walked through the cochran logic and why it's writing sample pointers at certain times I might be able to do better guessing at end-dives.
What does the Cochran application do with those corrupt dives? Does it show them? With or without a profile?
Cochran refuses to display a corrupt dive. You also mention that the Commander shows some erratic dives, although in the context of reclaimed and overwritten sample data. There were a lot of Commander dives that looked corrupt in Analyst. I was happy that libdivecomputer/subsurface mirrored those corrupt dive profiles because it suggested the sample data was corrupt or that we were at least as capable as Analyst.
I've been holding off asking Cochran again for help. They previously
refused citing concern that open source code would reveal too much about their algorithm. Now that the code exists I wonder if they would be willing to help by telling me how to determine model and memory configuration, or if they will demand that I stop.. It's also the same reason I've been trying to respect their privacy by not reading anything but my data.
That's their usual argument. The truth is that I've never encountered a device where you can download anything worth protecting (e.g. firmware code, decompression algorithm). All you can access is the memory containing the dives.
Have you had any success in changing their minds? Do you have any good examples of DC manufacturers that have been open that I could use as examples when talking to Cochran next?
My experience so far is that either they are open minded (HW, Reefnet, Atomics, Shearwater, etc) or not. Convincing them appears to be rather difficult :-(
On Fri, Jan 16, 2015 at 12:14 PM, John Van Ostrand john@vanostrand.com wrote:
On Tue, Jan 13, 2015 at 8:06 AM, Jef Driesen jef@libdivecomputer.org wrote:
John,
I finally had some time again for another look at the cochran backend. This time I concentrated mainly on the data from your Commander. The main issue there is that the profile ringbuffer is completely full. The oldest logbook entries are pointing to profile data that has already been overwritten with newer dives. Because this is not handled properly, those older dives end up with completely bogus profile data.
There are 347 logbook entries, and the end-of-profile (eop) pointer contains the value 0x0009a20d. If I dump the pre, begin and end pointers again, I get this:
346 00098290 00098478 00099d5a 488 1203 345 00097d5f 00097d5f 00097ddf 0 1201 344 0009661a 000966d3 000978ae 185 1201 ... 225 0009b16e 0009b16e 0009cbce 0 1201 224 00099e22 00099e6f 0009acbd 77 1201 <- Contains the eop address. 223 00096e85 00098109 00099971 4740 1201 ... 78 0009b79a 0009c2a1 0009d247 2823 1205 77 00099eb5 00099edf 0009b2e9 42 1201 <- Yet another time here. 76 00098588 000985c4 00099a04 60 1201 ... 1 00021928 00021a00 00023420 216 1201 0 00020000 000201fd 00021477 509 1201
As you can see dive #224 contains the eop address, which means part of the profile belongs to dive #346. Thus all older dives no longer have valid profile data.
The easiest solution to fix this is to process dives from the oldest to the newest one (which you already do), and sum the profile length. Once the total length exceeds the size of the ringbuffer (e.g. the maximum amount of profile data), all older dives will no longer have profile data. For those dives you only have the logbook entry.
I have attached a patch with a proof of concept implementation. It's far from complete, but I think you'll get the idea. The patch also illustrate how you can nicely concentrate all the main logic for downloading the dives into the foreach function, instead of scattered over many functions. The main idea is that you try to separate the download and parsing logic as much as possible.
(More comments inline.)
Is the application going to expect to see dives from oldest to newest?
On 2014-12-22 21:58, John Van Ostrand wrote:
On Mon, Dec 22, 2014 at 10:38 AM, Jef Driesen jef@libdivecomputer.org
A memory dump should be independent from whatever logic we use for locating the dives. If you simply read everything from address 0 to the end, then you we can keep using those old memory dumps. If a device has lots of memory, then yes, it may take a while to download, but that's fine. Downloading a memory dump is not the normal use-case. It's intended for troubleshooting, and then you do want all the data. (For reference, downloading a full memory on a Suunto Vyper takes 10-15 minutes, and that's only 8KB of data!)
If we really don't know the total amount of memory, then we'll have to make an assumption. If that assumptions turns out to be wrong, we can always adjust it. But at least we always have all data up to a certain address.
I'll have to commit to knowing the end of memory then. Right now I've avoided stating the end explicitly because I'm not sure where it is. If all Cochran DCs return 0xFF when past memory I can be a little liberal and not crash.
Rather than assuming a value that might be too large, I would start with a relative small value and increase whenever there is evidence that there is indeed more memory. Since the last memory area contains the profile ringbuffer, this evidence will most likely be a ringbuffer pointer that points past our assumed end. That's something that's very easy to check.
Certainly it's easier if we know where memory starts and ends and I may be close to figuring that out. I've programmed this with a few ideals in mind. The first is to not dump a dive just because we aren't absolutely sure of the data. I argue this because I assume most end users of libdivecomputer (e.g. subsurface users) are not relying on the data to validate diving algorithms, they want to see a graphical representation of their dive. If they are like me they want to see most of their dive even if the last few samples are wrong. This explains why I'd like to see an attempt to retrieve corrupt dives and dives that wrap the sample buffer. In my experience it shows useful data. I'll get to my other ideals later.
The code is and was using the logic you describe above initially because I had limited experience with Cochran DCs. I use logic like that in cochran_get_sample_parms() where I interate through the dives to establish a low/high range of sample data. I round down or up as appropriate to 16k boundaries. 16K representing 91 min of dive samples on an EMC or 136 min on a Commander. I figured this was a reasonable number to capture most recreational dives that wrapped. Rounding both values is an attempt to capture samples for dives that have wrapped the DC buffer. On a new computer the first log entry should point to the start of memory however consider a DC where the log book has wrapped. The log entry with the lowest start memory location will probably not point to the start of sample memory. The start of sample memory will likely have sample data from the last dive that wrapped. The same is true for the end of sample memory. In other words the log book is not guaranteed to have a dive that points to the exact start or end of memory. We could ignore any dive with wrapping sample data, as you recommend but I think there's a good case for doing everything to present a dive to the end user.
The rounded sample-end-pointer was also used in cochran_guess_sample_end_address() prior to commit e4608460f246565a260e821f7869de668f79caa8. Guessing was done when the dive end wasn't recorded properly. The sample-end-pointer was also used to reassemble sample data for dives that wrapped the sample buffer. I should revisit this code to make sure it still works as intended.
It wasn't until recent changes to the dump function that I decided I would commit to a memory end value. You expressed concern about guessing the end of samples, specifically where they wrapped and so I removed guessing entirely and went with hard coded values. I also based this decision on what Cochran's dealer said and what I've seen with the DCs I've tested. I've mentioned before that the DC can be configured to use less memory, dealers have the ability to restrict and grant memory usage. I'm told that the dealer's software does this and as such I don't know if Cochran charges the dealer or if this is pure profit for the dealer. Keeping that in mind I haven't seen a Cochran with restricted memory, partly perhaps because I have only seen one where sample memory was exhausted and possibly because dealers (maybe this one in particular) just grant all the memory or customers order it enabled. Basically I became much more comfortable hard coding the sample-end-pointer based on my experience.
I can understand the point of view that we should fail when we are unsure of data and then expect the end user to file a bug report. This would likely get earlier reports of errors but in the interim many new users might be turned away when the software fails to work for them.
If we start with a value that is too large, and there is a device where the profile data crosses the ringbuffer end, then we will bogus profile data. I'll illustrate with an example. Assume for a momement that the real profile ringbuffer is in the range 0x200-0x400, and we (incorrectly) assume the end is at 0x800. If there happens to be a dive that crosses the ringbuffer end, for example from 0x380 to 0x280, then we'll read a first part from 0x380 to 0x800 and a second part from 0x200 to 0x280.
200 280 380 400 800 |xxxxx xxxxx| | <- Correct |yyyyy yyyyy|yyyyyyyyyyyyyy| <- Wrong
As you can see that first part contains an extra 0x400 bytes that shouldn't be there. We can't easily detect this kind of errors, other than the user noticing the resulting profile is completely bogus.
If we had estimated the amount of memory too small, for example at 0x300, then we would be able to tell immediately that the 0x380 pointer is outside the allowed range. Then we know we have to adjust the range.
Since the other pieces of data (id, misc and config) have a fixed size,
the structure for a good memory dump could be as simple as concatenating all pieces:
id 67 bytes misc 1500 bytes config 2x512 (emc) or 4x512 (commander) bytes memory variable length, memory starting at address
An alternative is to only include the memory part, and take care of the other three separately, by means of the vendor event. This is how it's done in the several other backends (e.g. oceanic and reefnet).
Considering I'm one of two users of this I'd like to have the extra data. And considering the person doing the dump knows the model it should be apparent how to parse the data so I don't need to make a generic format.
I'm not sure I understand your concerns here. Of course we want that extra data. To locate the dives, we need at a minimum the id and config0 packets. And depending on what we discover, we might need the others too at some point. I did only mention that instead of pre-pending the id, misc and config blocks to the memory dump, we could deliver them separately by means of the vendor event. That's how it's done in those other backends.
Do you think we should consider the pre-dive event as part of the dive
data? Based on your description, these pre-dive events do not look really interesting from a dive logbook point of view. I have no idea how the parser should deal with it, other than just ignoring them.
If the data were readily available I'd be interested in some of the data, like altitude and temp changes while travelling. However I've looked at some of the raw pre-dive data and I can't parse the event data. All that said I think this data is a passing interest.
I took my cue from Analyst. It doesn't show the pre-dive events on dive profiles nor does it show the off-gassing data. It has an option to display "inter-dive" events and does so similar to a dive. I've not been interested in the data, except to find out what it was. I've been thinking of libdivecomputer in the context of Subsurface which has no mechanism to show inter-dive events or data. I think libdivecomputer would need some other mechanism to present this data and if only the Cochran does this it may not make sense to spend the time.
That's also my impression.
But remember that even if libdivecomputer itself doesn't use this information nor provides an api to parse it, there might be someone out there who is interested in the raw data. It's perfectly possible to build a cochran specific application that use libdivecomputer only for downloading the data, and does the parsing on its own. Probably not a very likely scenario, but not impossible. Btw, that's also the reason why libdivecomputer always provides access to the raw data!
Note that if we now strip this info, and change our minds later one, then it might be too late. Changing the data format will break backwards compatibility.
I agree it would be nice to provide this data via API. If we decide to provide this data via the existing API it's going to mess up a lot of existing applications. The inter-dive events may not be so bad, since there are usually only a few events, keep in mind some dives have a lot. Also I don't know how to parse data from these events.
The off-gassing data, if sent out through the samples_foreach function are going to be incorporated into the aplications' dive profiles and appear as flat trailing lines in the profile. Temperature might be the only interesting sample and barely so.
Already other interesting data from the Cochran data stream is not provided through the API. Data like deepest stop time and tissue compartment loads are not accessible. To make off-gassing samples meaningful we should also present the in-dive TC loads and then we should indicate to the application that this is inter-dive data.
A pointer of FFFFFFFF means the end of dive was never logged. I had a
problem with my DC where it would reboot not logging the end of dive. The code tries to salvage the dive logs.
Can you recover some of the samples? Based on what I've seen so far (see my long table) it makes no sense to try to recover the samples, because it looks like at least part of the data you are recovering are the pre-dive events of another dive. That certainly doesn't look right.
Look for example at dive #79:
78 0014b8f3 0014b94a 0014ef9d 87 1800 79 0014f6a5 0014f70d ffffffff 104 1800 80 0014f6a5 0015043c 00151442 3479 1373862
Your code to guess the end pointer will replace ffffffff with 0015043c, which is the start of the next dive (#80). So for dive #79 you take the range 0014f70d to 0015043c as the sample data. But that memory range is also the last part of the pre-dive events for dive #80. This overlap doesn't make any sense to me.
You can see several of those starting about there. The battery was low and the DC was starting to exhibit the crash problem so I was intentionally invoking it about that time so I could be confident in describing the problem.
Take a look at
95 001627c7 0016447b ffffffff 7348 1451976 96 001627c7 0016463c 001661b3 7797 1451976
That 7348 byte of samples on dive 95 was a 40 minute dive in Tobermory, Canada followed by another about that length. Both imported mostly correctly. I'd rather have that flawed dive profile than nothing.
I'm not sure what dive 79 was, but I figured the user would rather see corrupt dive imported than none so s/he could make a choice.
If you look at dive #96, then there is 7797 bytes of pre-dive events in
the range 001627c7-0016463c. But 7348 of those bytes are also the pre-dive events of dive #96 (range 001627c7-0016447b). That's what puzzles me. How can the pre-dive events of two dives overlap? That simply doesn't make any sense to me.
If you then guess the end address of dive #95 to be the start address of dive #96, then the samples of dive #95 end up as somewhere in the pre-dive data of dive #96. That's just weird.
The only explanation that I can come up with is that the dive computer started writing dive #95. Then it failed before the dive was finished (e.g. battery empty, reboot or whatever) and the end pointer didn't get written correctly. Hence the ffffffff value. But the end-of-profile pointer is probably only updated after the dive is finished. Hence the pre-dive pointer of the next dive retains the same value. I suspect that the current write position is somehow preserved and thus the next dive will start from there again. But this is of course just guessing.
That's what I think too. given that flash can be slow and have wears with writes I expect it's logical to eliminate unnecessary writes. Based on what I've seen the Cochran code writes the first half of the log when the dive starts (e.g. deeper than 6ft). There are two end-points to a dive. The first is when the diver ascends to 3ft. I think at this point Cochran updates the sample-end-pointer in config0. Cochran then continues logging until the configured "Post dive surface interval minutes" (10 - 20 minutes) elapses. This is why you see 1800 bytes (10 min * 60 seconds * 3 samples/sec) between dive-end-pointer and the pre-dive-pointer. At any time prior to this elapsing a submersion to below 6 ft continues the dive. I think this is why these post-dive samples exist. After the elapsed interval, the end-dive log is written and now the pre-dive-pointer is known and can be updated in config0.
So although the pre-dive pointer is bogus on the dive after a corrupt dive, the dive sample data of the corrupt dive is intact but determining the end has been difficult. Now that I've thought through the Cochran logic, I should go over my logic to see if I can better determine the end of a corrupt dive.
I understand why you're trying to salvage those corrupt dives (in the
sense that having something is better than nothing at all), but I worry that this will complicate the code or produce bogus data (which is even worse). When I look at some of those salvaged dives, that seems to be the case already. They end abrupt, which is probably normal. But there appears to be some amount of "random" data (e.g. spikes in the profile) too. And that's not good. Maybe this is just a bug in the parsing that we can fix? I simply don't know at this point.
The code ins't too complicated because of it. A corrupt dive is known because of the FFFFFFFF dive-end-pointer. Knowing that we loop, exiting on a memory boundary, when we see a sample that doesn't make sense (0xFF), when we see normal end-dive events or normal start-dive events. If we don't find end-dive events and we stop because of start-dive events we may have processed some inter-dive events as sample data and I think that's why we see the spikes at the end. It's a few lines of code in the parse function plus the guess function and a few if-thens. Since corrupt dives don't have end-dive log data the code parses the samples for depth, temp and duration, etc.
Now that I've walked through the cochran logic and why it's writing sample pointers at certain times I might be able to do better guessing at end-dives.
I've looked into three of the corrupt dives on the EMC, dives 4, 6, 13 and 14. Given the existing logic, the strange data at the end of those dive profiles is the inter-dive events before the next dive. There seem to be three events 0x10, 00, and 02. This is a crash and reboot. The data looks like this:
dive 4/5 00004dc0 40 69 00 42 91 09 41 69 0a 43 93 0a 42 69 09 a8 |@i.B..Ai.C..Bi..| 00004dd0 c0 42 80 08 02 68 07 42 80 06 42 68 05 40 80 04 |.B...h.B..Bh.@ ..| 00004de0 42 68 04 42 80 03 40 68 02 40 80 02 02 68 01 40 |Bh.B..@h. @...h.@| 00004df0 80 01 01 68 00 41 80 00 40 68 00 01 80 00 41 68 |...h.A..@h. ...Ah| 00004e00 10 a4 70 36 2a 16 0e 11 0a 06 0e b5 02 00 00 00 |..p6*...........| 00004e10 00 c6 48 00 00 00 a5 70 36 2a 17 0e 11 0a 06 0e |..H....p6*......| 00004e20 b5 02 00 00 00 00 c6 48 02 ab 70 36 2a 1d 0e 11 |.......H..p6*...| 00004e30 0a 06 0e f9 02 00 00 00 00 c6 48 00 e3 f3 40 02 |..........H...@ .| 00004e40 00 01 69 00 02 10 00 40 69 00 41 12 00 41 69 00 |..i....@i.A. .Ai.|
dive 6/7 000073c0 6d 00 02 0e 04 01 6d 03 40 0e 03 01 6c 02 02 0e |m.....m.@ ...l...| 000073d0 02 01 6c 01 02 14 06 02 6c 09 01 18 0c 01 6c 0f |..l.....l.....l.| 000073e0 02 16 11 40 6c 10 02 13 0e 01 6c 0c 40 11 09 40 |...@l.....l.@ ..@| 000073f0 6c 00 40 0b 00 40 6c 00 41 83 00 40 6c 00 40 84 |l.@..@l.A..@l. @.| 00007400 10 8b 80 36 2a 0d 16 12 0a 06 0e b3 02 00 00 00 |...6*...........| 00007410 00 02 49 00 00 00 8c 80 36 2a 0e 16 12 0a 06 0e |..I.....6*......| 00007420 b3 02 00 00 00 00 02 49 02 92 80 36 2a 14 16 12 |.......I...6*...| 00007430 0a 06 0e f6 02 00 00 00 00 3d 49 00 e3 f3 40 08 |.........=I...@ .| 00007440 00 02 6b 00 01 15 00 01 6b 00 01 1b 00 40 6b 00 |..k.....k....@k.|
dive 13/14 00014de0 01 40 69 01 40 0d ff 01 69 ff 01 08 00 41 69 00 |.@i.@ ...i....Ai.| 00014df0 01 05 07 40 69 09 40 03 0b 41 69 0d 41 83 0f 41 |...@i.@ ..Ai.A..A| 00014e00 10 a4 b4 3f 2a 1a 39 11 11 06 0e b7 02 00 00 00 |...?*.9.........| 00014e10 00 79 49 00 00 00 a5 b4 3f 2a 1b 39 11 11 06 0e |.yI.....?*.9....| 00014e20 b7 02 00 00 00 00 79 49 02 ab b4 3f 2a 21 39 11 |......yI...?*!9.| 00014e30 11 06 0e fb 02 00 00 00 00 3d 49 00 e3 f3 40 00 |.........=I...@ .| 00014e40 00 02 6a 00 42 07 00 40 6a 00 41 81 00 40 6a 00 |..j.B..@j.A. .@j.| 00014e50 01 83 00 03 6a 00 01 11 00 40 6a 00 42 12 00 43 |....j....@j.B. .C|
dive 14/15 000157e0 00 40 80 00 02 69 01 01 80 02 01 69 03 01 80 04 |.@...i.....i....| 000157f0 41 69 04 40 80 04 41 69 04 40 80 00 41 69 00 41 |Ai.@..Ai.@ ..Ai.A| 00015800 10 97 b8 3f 2a 11 0e 12 11 06 0e b4 02 00 00 00 |...?*...........| 00015810 00 3d 49 00 00 00 98 b8 3f 2a 12 0e 12 11 06 0e |.=I.....?*......| 00015820 b4 02 00 00 00 00 3d 49 02 9e b8 3f 2a 18 0e 12 |......=I...?*...| 00015830 11 06 0e fb 02 00 00 00 00 3d 49 00 e3 f3 40 07 |.........=I...@ .| 00015840 00 07 6d 00 02 2a 00 03 6d 00 02 38 00 01 6d 00 |..m..*..m..8..m.|
Look for the "e3 f3" bytes in the excerpts, that is the beginning of the next dive. If you back track you'll see 3 interdive events. You can use the table of sizes in the source code to isolate the events but it's easier to look for the time stamp which ends with 2a and is four bytes in total. The byte prior to the time stamp is the event code.
For example in sample 14/15 the breakdown is:
Dive 14 samples: 000157e0 00 40 80 00 02 69 01 01 80 02 01 69 03 01 80 04 |.@...i.....i....| 000157f0 41 69 04 40 80 04 41 69 04 40 80 00 41 69 00 41 |Ai.@..Ai.@ ..Ai.A|
Interdive events
Event 0x10 00015800 10 97 b8 3f 2a 11 0e 12 11 06 0e b4 02 00 00 00 |...?*...........| 00015810 00 3d 49 00 00
Event 00 00 98 b8 3f 2a 12 0e 12 11 06 0e |.=I.....?*......| 00015820 b4 02 00 00 00 00 3d 49
Event 02 02 9e b8 3f 2a 18 0e 12 |......=I...?*...| 00015830 11 06 0e fb 02 00 00 00 00 3d 49 00
Dive 15 samples: e3 f3 40 07 |.........=I...@.| 00015840 00 07 6d 00 02 2a 00 03 6d 00 02 38 00 01 6d 00 |..m..*..m..8..m.|
I think this pattern may be specific to the EMC and specific to the failure event, a crash/reboot in this case. I could cycle through the table of inter-dive events and to see if the byte code matches and test to see if the time stamp is reasonable. That should work to find the actual end of previous dive.
What does the Cochran application do with those corrupt dives? Does it show
them? With or without a profile?
Cochran refuses to display a corrupt dive. You also mention that the Commander shows some erratic dives, although in the context of reclaimed and overwritten sample data. There were a lot of Commander dives that looked corrupt in Analyst. I was happy that libdivecomputer/subsurface mirrored those corrupt dive profiles because it suggested the sample data was corrupt or that we were at least as capable as Analyst.
I've been holding off asking Cochran again for help. They previously
refused citing concern that open source code would reveal too much about their algorithm. Now that the code exists I wonder if they would be willing to help by telling me how to determine model and memory configuration, or if they will demand that I stop.. It's also the same reason I've been trying to respect their privacy by not reading anything but my data.
That's their usual argument. The truth is that I've never encountered a device where you can download anything worth protecting (e.g. firmware code, decompression algorithm). All you can access is the memory containing the dives.
Have you had any success in changing their minds? Do you have any good examples of DC manufacturers that have been open that I could use as examples when talking to Cochran next?
My experience so far is that either they are open minded (HW, Reefnet, Atomics, Shearwater, etc) or not. Convincing them appears to be rather difficult :-(
-- John Van Ostrand At large on sabbatical
On 16-01-15 18:14, John Van Ostrand wrote:
Is the application going to expect to see dives from oldest to newest?
Yes! This is very important for the "download only new dives" feature. If the application receives the dives in newest first order, it can stop the download as soon as it recognizes a dive that it already has. Libdivecomputer's internal implementation (using the fingerprint feature) also requires this.
That reminds me of a somewhat related issue. Currently the entire profile ringbuffer (or at least the part that contains dives) gets downloaded with a single large request. For applications not using libdivecomputer' built-in fingerprint feature, that means we will first download ALL dives, and then throw away the ones the application has already downloaded before. That's not really optimal. Especially because downloading only the new dives happens to be our most common use-case.
This could be improved by downloading the profile data with multiple smaller requests. For example one request per dive profile (or two request if the dive happens to cross the ringbuffer end). I believe this is technically possible with the Cochran protocol. Multiple request will probably add some extra overhead when doing a complete download. But I doubt that outweighs the benefits in the normal case of downloading only a few dives.
With multiple smaller requests it's also possible to cancel a download. So that's an extra advantage that's nearly impossible to implement correctly with just one large request.
Rather than assuming a value that might be too large, I would start with a relative small value and increase whenever there is evidence that there is indeed more memory. Since the last memory area contains the profile ringbuffer, this evidence will most likely be a ringbuffer pointer that points past our assumed end. That's something that's very easy to check.
Certainly it's easier if we know where memory starts and ends and I may be close to figuring that out. I've programmed this with a few ideals in mind. The first is to not dump a dive just because we aren't absolutely sure of the data. I argue this because I assume most end users of libdivecomputer (e.g. subsurface users) are not relying on the data to validate diving algorithms, they want to see a graphical representation of their dive. If they are like me they want to see most of their dive even if the last few samples are wrong. This explains why I'd like to see an attempt to retrieve corrupt dives and dives that wrap the sample buffer. In my experience it shows useful data. I'll get to my other ideals later.
We're not dropping any dives, only informing the caller that we did encounter data that we (currently) can't handle correctly. That's something completely different!
Relying on guessing is always error-prone and unreliable. Just one wrong sample is enough to render the recovery of a corrupted dive completely useless. For example if there is one depth sample completely out of range, then the application will scale the profile accordingly. The result is that the good part of the profile will be squeezed completely, and you won't be able to see anything useful at all. And things can get much worse too (see examples below).
[...]
I can understand the point of view that we should fail when we are unsure of data and then expect the end user to file a bug report. This would likely get earlier reports of errors but in the interim many new users might be turned away when the software fails to work for them.
If you ask me, silently ignoring errors is an order of magnitude worse than gracefully failing with an error. Remember that if we ignore the error and try to proceed anyway, there is absolutely no guarantee that the result is correct. If you're lucky then the result is completely bogus and the user will notice immediately (e.g. unrealistic profiles). Smaller mistakes may go unnoticed at first, but if you realize after a while it might be already too late for re-downloading. And last but not least there is also the possibility of crashing the application.
I can give plenty examples of real life scenarios that I encountered during libdivecomputer development. Two examples: If a ringbuffer pointer is out of range, and you try to use it anyway with code that expects a pointer in a certain range, then you'll likely end up with a buffer overflow and crash the application. When parsing "random" data, you can easily end up with depths (or times) that are way beyond the normal range (e.g. order of several kilometres). Definitely a challenge for the decompression algorithm in the application. (Hint: I ran out of patience and just killed it.)
Anyway, my experience is that being very strict pays off in the long run. For the end-user, it may be a bit annoying initially, but it's certainly not making things worse.
Note that I have the impression we're mixing two unrelated issues in this discussion: (1) problems associated with the ringbuffer end, and (2) recovery of corrupt dives.
For issue #1, this is all about finding the correct ringbuffer begin/end address. Once we have those values, the problem is solved and there is absolutely no need for any guessing. That's why I insist on being very strict there. It will help us to find the correct values faster.
For issue #2 the situation is very different. Because we're dealing with corrupt data here, this will not be as easy as finding some magic values. Maybe it's indeed possible to recover some data without too much serious issues. In that case I'm fine doing that, but if not I lean towards sticking to the logbook header only and dropping the partial profile. But let's first see how far we get.
But remember that even if libdivecomputer itself doesn't use this information nor provides an api to parse it, there might be someone out there who is interested in the raw data. It's perfectly possible to build a cochran specific application that use libdivecomputer only for downloading the data, and does the parsing on its own. Probably not a very likely scenario, but not impossible. Btw, that's also the reason why libdivecomputer always provides access to the raw data!
Note that if we now strip this info, and change our minds later one, then it might be too late. Changing the data format will break backwards compatibility.
I agree it would be nice to provide this data via API. If we decide to provide this data via the existing API it's going to mess up a lot of existing applications. The inter-dive events may not be so bad, since there are usually only a few events, keep in mind some dives have a lot. Also I don't know how to parse data from these events.
If we consider the inter-dive events to be part of the dive, and include it in our raw dive data, that doesn't mean the parser has to do anything with it. The parser can simply ignore it if it doesn't fit well into the api. But it's still there, in case an applications wants to look at the raw binary data.
Just to make myself clear: For the inter-dive events I'm not sure whether we should include it or not. The libdivecomputer api is based around the concept of dives. Inter-dive events don't really fit well into that model. They are not really part of the dive that comes before or after it. We don't have any way to deliver non-dive data to the application either. So the only options I see at the moment, are to include it as a pre-dive data, or to discard it. Currently my preference is more towards the latter option, but at this point I want to leave all options open.
The off-gassing data, if sent out through the samples_foreach function are going to be incorporated into the aplications' dive profiles and appear as flat trailing lines in the profile. Temperature might be the only interesting sample and barely so.
I doubt this is off-gassing data. It's an artifact of how dive computers determine the end of a dive. They typically consider a dive after spending X amount of time above a certain depth threshold Y. During this period they keep recording because you might descend again and resume the dive. When the dive is really over, some dive computers keep this tail intact, others rewind and discard it. It looks like the cochran has some sort of compromise: It leaves the tail intact, but also indicates at which point it considered the dive finished.
I would definitely include this data. Discarding the tail should be done by the application, where it can be configured by the user. (I believe subsurface already support this.) Some users will want to see it, and others don't.
Already other interesting data from the Cochran data stream is not provided through the API. Data like deepest stop time and tissue compartment loads are not accessible. To make off-gassing samples meaningful we should also present the in-dive TC loads and then we should indicate to the application that this is inter-dive data.
The libdivecomputer api supports deco/ndl info, but not tissue loadings. The latter are too device specific. To be able to interpret tissue loadings, you need to know the decompression algorithm and all its parameter. That's info we simply don't have (and likely never will). Implementing a decompression algorithm in the application makes a lot more sense.
The only explanation that I can come up with is that the dive computer started writing dive #95. Then it failed before the dive was finished (e.g. battery empty, reboot or whatever) and the end pointer didn't get written correctly. Hence the ffffffff value. But the end-of-profile pointer is probably only updated after the dive is finished. Hence the pre-dive pointer of the next dive retains the same value. I suspect that the current write position is somehow preserved and thus the next dive will start from there again. But this is of course just guessing.
That's what I think too. given that flash can be slow and have wears with writes I expect it's logical to eliminate unnecessary writes. Based on what I've seen the Cochran code writes the first half of the log when the dive starts (e.g. deeper than 6ft). There are two end-points to a dive. The first is when the diver ascends to 3ft. I think at this point Cochran updates the sample-end-pointer in config0. Cochran then continues logging until the configured "Post dive surface interval minutes" (10 - 20 minutes) elapses. This is why you see 1800 bytes (10 min * 60 seconds * 3 samples/sec) between dive-end-pointer and the pre-dive-pointer. At any time prior to this elapsing a submersion to below 6 ft continues the dive. I think this is why these post-dive samples exist. After the elapsed interval, the end-dive log is written and now the pre-dive-pointer is known and can be updated in config0.
So although the pre-dive pointer is bogus on the dive after a corrupt dive, the dive sample data of the corrupt dive is intact but determining the end has been difficult. Now that I've thought through the Cochran logic, I should go over my logic to see if I can better determine the end of a corrupt dive.
Makes sense.
Excellent info about that 1800 (emc) or 1201 (commander) bytes block. Initially I assumed this was some fixed size trailer, and I had no explanation for the fact that on some dives it's a few bytes larger. But you are right. It's indeed the tail of the dive profile, and those few extra bytes are events. More pieces start to fall into place!
I understand why you're trying to salvage those corrupt dives (in the sense that having something is better than nothing at all), but I worry that this will complicate the code or produce bogus data (which is even worse). When I look at some of those salvaged dives, that seems to be the case already. They end abrupt, which is probably normal. But there appears to be some amount of "random" data (e.g. spikes in the profile) too. And that's not good. Maybe this is just a bug in the parsing that we can fix? I simply don't know at this point.
The code ins't too complicated because of it. A corrupt dive is known because of the FFFFFFFF dive-end-pointer. Knowing that we loop, exiting on a memory boundary, when we see a sample that doesn't make sense (0xFF), when we see normal end-dive events or normal start-dive events. If we don't find end-dive events and we stop because of start-dive events we may have processed some inter-dive events as sample data and I think that's why we see the spikes at the end. It's a few lines of code in the parse function plus the guess function and a few if-thens. Since corrupt dives don't have end-dive log data the code parses the samples for depth, temp and duration, etc.
Now that I've walked through the cochran logic and why it's writing sample pointers at certain times I might be able to do better guessing at end-dives.
Well the code is indeed not really complicated, but it's also not without flaws. Those spikes at the end are examples of that. Fixing those issues might turn out harder than expected. That's my main concern.
I suggest we first take care of the non-corrupt dives, and get the backend into a state where we can integrate it into libdivecomputer. Once we have that, we take a step back and try to come up with a solution to deal with those corrupt dives too. What do you think?
What does the Cochran application do with those corrupt dives? Does it show them? With or without a profile?
Cochran refuses to display a corrupt dive.
So if we're not able to recover corrupt dives, we're at least not doing worse then Analyst. I think that's already a good starting point. If we manage to do better then great.
You also mention that the Commander shows some erratic dives, although in the context of reclaimed and overwritten sample data. There were a lot of Commander dives that looked corrupt in Analyst. I was happy that libdivecomputer/subsurface mirrored those corrupt dive profiles because it suggested the sample data was corrupt or that we were at least as capable as Analyst.
Well, we're certainly not going to re-implement the bugs in the Analyst application :-)
Jef
On Wed, Jan 21, 2015 at 10:52 AM, Jef Driesen jef@libdivecomputer.org wrote:
On 16-01-15 18:14, John Van Ostrand wrote:
Is the application going to expect to see dives from oldest to newest?
Yes! This is very important for the "download only new dives" feature. If the application receives the dives in newest first order, it can stop the download as soon as it recognizes a dive that it already has. Libdivecomputer's internal implementation (using the fingerprint feature) also requires this.
That's what I expected. So I'll take your "process dives from the oldest to the newest one (which you already do), and sum the profile length." to be misstated. I'll process from newest to oldest to determine which dives have valid profile data and I'll continue to present dives newest to oldest.
That reminds me of a somewhat related issue. Currently the entire profile ringbuffer (or at least the part that contains dives) gets downloaded with a single large request. For applications not using libdivecomputer' built-in fingerprint feature, that means we will first download ALL dives, and then throw away the ones the application has already downloaded before. That's not really optimal. Especially because downloading only the new dives happens to be our most common use-case.
The logic downloads only the sample data from the fingerprint dive forward, addresses are rounded to 16KB boundaries so that's the only waste. I'll experiment with more accurate addresses.
This could be improved by downloading the profile data with multiple
smaller requests. For example one request per dive profile (or two request if the dive happens to cross the ringbuffer end). I believe this is technically possible with the Cochran protocol. Multiple request will probably add some extra overhead when doing a complete download. But I doubt that outweighs the benefits in the normal case of downloading only a few dives.
With multiple smaller requests it's also possible to cancel a download. So that's an extra advantage that's nearly impossible to implement correctly with just one large request.
Downloading sample data is more complicated than downloading id, config, and misc data. First an 800ms delay seems to be needed. Maybe this can be whittled down, I'm sure I tried already though. Then we wait to see a heartbeat byte issued every second. If we don't wait the DC will ignore our command. So that's an average of 500ms. Then the command is sent and we switch to a high baud rate. The DC takes 450ms to process the request. Round it all up a bit and it's about 1.8 seconds per request. Then it's about 250ms per 4K of data.
With that in mind let's think about some scenarios:
1. One dive to download will take the same time either way. 2. Ten dives (say 100KB of samples) would take 24 seconds (18 seconds of overhead and 6 seconds of data transfer) done one dive at a time verses 8 seconds all at once. 3. One hundred dives (1MB of samples) would take 247 seconds (180 seconds of overhead and 57 seconds of data transfer,) verses 59 seconds all at once.
That's a lot of overhead, 3 to 4 times longer for dive-by-dive download. If we ignore that 800ms, the times are 16 vs 7 seconds or 157 vs 60, it's still double.
Rather than assuming a value that might be too large, I would start with a
relative small value and increase whenever there is evidence that there is indeed more memory. Since the last memory area contains the profile ringbuffer, this evidence will most likely be a ringbuffer pointer that points past our assumed end. That's something that's very easy to check.
Certainly it's easier if we know where memory starts and ends and I may be close to figuring that out. I've programmed this with a few ideals in mind. The first is to not dump a dive just because we aren't absolutely sure of the data. I argue this because I assume most end users of libdivecomputer (e.g. subsurface users) are not relying on the data to validate diving algorithms, they want to see a graphical representation of their dive. If they are like me they want to see most of their dive even if the last few samples are wrong. This explains why I'd like to see an attempt to retrieve corrupt dives and dives that wrap the sample buffer. In my experience it shows useful data. I'll get to my other ideals later.
We're not dropping any dives, only informing the caller that we did encounter data that we (currently) can't handle correctly. That's something completely different!
Dropping dive profile data is what I meant. In all the cases I've seen the data is entirely recoverable but we simply don't stop early enough and we see 20 seconds of bad data at the end of a dive. The user can delete or ignore the dive if they want.
Relying on guessing is always error-prone and unreliable. Just one wrong
sample is enough to render the recovery of a corrupted dive completely useless. For example if there is one depth sample completely out of range, then the application will scale the profile accordingly. The result is that the good part of the profile will be squeezed completely, and you won't be able to see anything useful at all. And things can get much worse too (see examples below).
These are educated guesses based on empirical data. I have a dozen or so corrupt dives that do recover reasonably well. This is presumption not assumption that I'm working on.
The profiles I've recovered look acceptable in Subsurface and i like that I can see my dive profile and I know that spike at the end can be ignored.
[...]
I can understand the point of view that we should fail when we are unsure of data and then expect the end user to file a bug report. This would likely get earlier reports of errors but in the interim many new users might be turned away when the software fails to work for them.
If you ask me, silently ignoring errors is an order of magnitude worse than gracefully failing with an error. Remember that if we ignore the error and try to proceed anyway, there is absolutely no guarantee that the result is correct. If you're lucky then the result is completely bogus and the user will notice immediately (e.g. unrealistic profiles). Smaller mistakes may go unnoticed at first, but if you realize after a while it might be already too late for re-downloading. And last but not least there is also the possibility of crashing the application.
We are still talking about corrupt dives aren't we? Technically speaking, given that we don't have the full specifications of dive computers it's really not guaranteed that any data we parse is going to be correct. I'd say that corrupt data might have less of a guarantee but I've had very good success recovering dive profiles.
Preventing a crash because of random data can be done if we are careful.
I can give plenty examples of real life scenarios that I encountered during libdivecomputer development. Two examples: If a ringbuffer pointer is out of range, and you try to use it anyway with code that expects a pointer in a certain range, then you'll likely end up with a buffer overflow and crash the application. When parsing "random" data, you can easily end up with depths (or times) that are way beyond the normal range (e.g. order of several kilometres). Definitely a challenge for the decompression algorithm in the application. (Hint: I ran out of patience and just killed it.)
Absolutely, checking pointers is a good idea.
Anyway, my experience is that being very strict pays off in the long run. For the end-user, it may be a bit annoying initially, but it's certainly not making things worse.
Note that I have the impression we're mixing two unrelated issues in this discussion: (1) problems associated with the ringbuffer end, and (2) recovery of corrupt dives.
For issue #1, this is all about finding the correct ringbuffer begin/end address. Once we have those values, the problem is solved and there is absolutely no need for any guessing. That's why I insist on being very strict there. It will help us to find the correct values faster.
No argument there. I completely agree. It's why I raised it as an issue.
For issue #2 the situation is very different. Because we're dealing with corrupt data here, this will not be as easy as finding some magic values. Maybe it's indeed possible to recover some data without too much serious issues. In that case I'm fine doing that, but if not I lean towards sticking to the logbook header only and dropping the partial profile. But let's first see how far we get.
We are dealing with missing pointers, not corrupt data. All we need to do is better locate the profile end pointer.
But remember that even if libdivecomputer itself doesn't use this
information nor provides an api to parse it, there might be someone out there who is interested in the raw data. It's perfectly possible to build a cochran specific application that use libdivecomputer only for downloading the data, and does the parsing on its own. Probably not a very likely scenario, but not impossible. Btw, that's also the reason why libdivecomputer always provides access to the raw data!
Note that if we now strip this info, and change our minds later one, then it might be too late. Changing the data format will break backwards compatibility.
I agree it would be nice to provide this data via API. If we decide to provide this data via the existing API it's going to mess up a lot of existing applications. The inter-dive events may not be so bad, since there are usually only a few events, keep in mind some dives have a lot. Also I don't know how to parse data from these events.
If we consider the inter-dive events to be part of the dive, and include it in our raw dive data, that doesn't mean the parser has to do anything with it. The parser can simply ignore it if it doesn't fit well into the api. But it's still there, in case an applications wants to look at the raw binary data.
Just to make myself clear: For the inter-dive events I'm not sure whether we should include it or not. The libdivecomputer api is based around the concept of dives. Inter-dive events don't really fit well into that model. They are not really part of the dive that comes before or after it. We don't have any way to deliver non-dive data to the application either. So the only options I see at the moment, are to include it as a pre-dive data, or to discard it. Currently my preference is more towards the latter option, but at this point I want to leave all options open.
There are two types of inter-dive data, inter-dive events which appear before most dives. These are things like off, on, configuration changes, connect to Analyst, altitude/temp changes, etc. These are events but they aren't dive events and the time stamps on them is going to skew the profile that's why they shouldn't be included.
What I meant was that it would be nice to provide this data but it would have to be done through an extended API.
The off-gassing data, if sent out through the samples_foreach function are
going to be incorporated into the aplications' dive profiles and appear as flat trailing lines in the profile. Temperature might be the only interesting sample and barely so.
I doubt this is off-gassing data. It's an artifact of how dive computers determine the end of a dive. They typically consider a dive after spending X amount of time above a certain depth threshold Y. During this period they keep recording because you might descend again and resume the dive. When the dive is really over, some dive computers keep this tail intact, others rewind and discard it. It looks like the cochran has some sort of compromise: It leaves the tail intact, but also indicates at which point it considered the dive finished.
I would definitely include this data. Discarding the tail should be done by the application, where it can be configured by the user. (I believe subsurface already support this.) Some users will want to see it, and others don't.
This is the second type of inter-dive data, samples.
I call it "off-gassing" because I was told by tech support that the computer stays on calculating off-gassing after a dive. After seeing the data in detail I can see that was misstated.
Plotted, the data is a flat line at surface with temp changes and it lasts 10 to 20 minutes, which would plot as a significantly large flat tail at the end of most dives.
Already other interesting data from the Cochran data stream is not
provided through the API. Data like deepest stop time and tissue compartment loads are not accessible. To make off-gassing samples meaningful we should also present the in-dive TC loads and then we should indicate to the application that this is inter-dive data.
The libdivecomputer api supports deco/ndl info, but not tissue loadings. The latter are too device specific. To be able to interpret tissue loadings, you need to know the decompression algorithm and all its parameter. That's info we simply don't have (and likely never will). Implementing a decompression algorithm in the application makes a lot more sense.
The only explanation that I can come up with is that the dive computer
started writing dive #95. Then it failed before the dive was finished (e.g. battery empty, reboot or whatever) and the end pointer didn't get written correctly. Hence the ffffffff value. But the end-of-profile pointer is probably only updated after the dive is finished. Hence the pre-dive pointer of the next dive retains the same value. I suspect that the current write position is somehow preserved and thus the next dive will start from there again. But this is of course just guessing.
That's what I think too. given that flash can be slow and have wears
with writes I expect it's logical to eliminate unnecessary writes. Based on what I've seen the Cochran code writes the first half of the log when the dive starts (e.g. deeper than 6ft). There are two end-points to a dive. The first is when the diver ascends to 3ft. I think at this point Cochran updates the sample-end-pointer in config0. Cochran then continues logging until the configured "Post dive surface interval minutes" (10 - 20 minutes) elapses. This is why you see 1800 bytes (10 min * 60 seconds * 3 samples/sec) between dive-end-pointer and the pre-dive-pointer. At any time prior to this elapsing a submersion to below 6 ft continues the dive. I think this is why these post-dive samples exist. After the elapsed interval, the end-dive log is written and now the pre-dive-pointer is known and can be updated in config0.
So although the pre-dive pointer is bogus on the dive after a corrupt dive, the dive sample data of the corrupt dive is intact but determining the end has been difficult. Now that I've thought through the Cochran logic, I should go over my logic to see if I can better determine the end of a corrupt dive.
Makes sense.
Excellent info about that 1800 (emc) or 1201 (commander) bytes block. Initially I assumed this was some fixed size trailer, and I had no explanation for the fact that on some dives it's a few bytes larger. But you are right. It's indeed the tail of the dive profile, and those few extra bytes are events. More pieces start to fall into place!
I understand why you're trying to salvage those corrupt dives (in the
sense that having something is better than nothing at all), but I worry that this will complicate the code or produce bogus data (which is even worse). When I look at some of those salvaged dives, that seems to be the case already. They end abrupt, which is probably normal. But there appears to be some amount of "random" data (e.g. spikes in the profile) too. And that's not good. Maybe this is just a bug in the parsing that we can fix? I simply don't know at this point.
The code ins't too complicated because of it. A corrupt dive is known because of the FFFFFFFF dive-end-pointer. Knowing that we loop, exiting on a memory boundary, when we see a sample that doesn't make sense (0xFF), when we see normal end-dive events or normal start-dive events. If we don't find end-dive events and we stop because of start-dive events we may have processed some inter-dive events as sample data and I think that's why we see the spikes at the end. It's a few lines of code in the parse function plus the guess function and a few if-thens. Since corrupt dives don't have end-dive log data the code parses the samples for depth, temp and duration, etc.
Now that I've walked through the cochran logic and why it's writing sample pointers at certain times I might be able to do better guessing at end-dives.
Well the code is indeed not really complicated, but it's also not without flaws. Those spikes at the end are examples of that. Fixing those issues might turn out harder than expected. That's my main concern.
Backtracking through inter-dive events should remove that spike and it should be reasonably reliable. At worst it would still remove all events and truncate a few seconds of profile. That's probably better than having a spike.
I suggest we first take care of the non-corrupt dives, and get the backend into a state where we can integrate it into libdivecomputer. Once we have that, we take a step back and try to come up with a solution to deal with those corrupt dives too. What do you think?
It's been 8 months that I've been working on this, I'd like to see it included. The changes you've asked for so far are quite intensive and are going to require a significant amount of work. I'm going to keep the corrupt profile recovery in mind since Im so close on that too.
What does the Cochran application do with those corrupt dives? Does it
show them? With or without a profile?
Cochran refuses to display a corrupt dive.
So if we're not able to recover corrupt dives, we're at least not doing worse then Analyst. I think that's already a good starting point. If we manage to do better then great.
The Analyst software is pretty crappy Windows 3.1, non-standard-UI software that almost always crashes before I can exit it. Doing better than Analyst isn't going to be hard. I should also mention that behind that crappy, crashy interface is quite a bit of functionality, but it doesn't feel well programmed at all.
You also mention that the
Commander shows some erratic dives, although in the context of reclaimed and overwritten sample data. There were a lot of Commander dives that looked corrupt in Analyst. I was happy that libdivecomputer/subsurface mirrored those corrupt dive profiles because it suggested the sample data was corrupt or that we were at least as capable as Analyst.
Well, we're certainly not going to re-implement the bugs in the Analyst application :-)
I'm not suggesting we do, it's just the first bar to pass.