Add support for use of the system timezone database, rather than embedding a copy. Discussed upstream but was not desired. History: r7: per Sean Finney's review: simpler lat/long rounding, use stat() not access() to check existence of timezone, improve comments throughout. r6: fix fd leak in r5, fix country code/BC flag use in timezone_identifiers_list() using system db, fix use of PECL timezonedb to override system db, r5: reverts addition of "System/Localtime" fake tzname. updated for 5.3.0, parses zone.tab to pick up mapping between timezone name, country code and long/lat coords r4: added "System/Localtime" tzname which uses /etc/localtime r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert) r2: add filesystem trawl to set up name alias index r1: initial revision --- a/ext/date/lib/parse_tz.c +++ b/ext/date/lib/parse_tz.c @@ -20,6 +20,16 @@ #include "timelib.h" +#ifdef HAVE_SYSTEM_TZDATA +#include +#include +#include +#include +#include + +#include "php_scandir.h" +#endif + #include #ifdef HAVE_LOCALE_H @@ -31,7 +41,12 @@ #else #include #endif + +#ifndef HAVE_SYSTEM_TZDATA #include "timezonedb.h" +#endif + +#include #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__)) # if defined(__LITTLE_ENDIAN__) @@ -51,6 +66,11 @@ static void read_preamble(const unsigned char **tzf, timelib_tzinfo *tz) { + if (memcmp(tzf, "TZif", 4) == 0) { + *tzf += 20; + return; + } + /* skip ID */ *tzf += 4; @@ -256,7 +276,435 @@ void timelib_dump_tzinfo(timelib_tzinfo } } -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb) +#ifdef HAVE_SYSTEM_TZDATA + +#ifdef HAVE_SYSTEM_TZDATA_PREFIX +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX +#else +#define ZONEINFO_PREFIX "/usr/share/zoneinfo" +#endif + +/* Hash table entry for the cache of the zone.tab mapping table. */ +struct location_info { + char code[2]; /* Country code. */ + double latitude, longitude; + char name[64]; + char *comment; + struct location_info *next; +}; + +/* System timezone database pointer. */ +static const timelib_tzdb *timezonedb_system = NULL; + +/* Cache of zone.tab location data. */ +static struct location_info **system_location_table; + +/* Size of the zone.tab hash table; a random-ish prime big enough to + * prevent too many collisions. */ +#define LOCINFO_HASH_SIZE (1021) + +/* Hash function for indexing the location_info hash table. */ +static uint32_t tz_hash(const char *str) +{ + const unsigned char *p = (const unsigned char *)str; + uint32_t hash = 5381; + int c; + + while ((c = *p++) != '\0') { + hash = (hash << 5) ^ hash ^ c; + } + + return hash % LOCINFO_HASH_SIZE; +} + +/* Parse an ISO-6709 co-ordinate as used in zone.tab. Returns end of + * the parsed string on success, or NULL on parse error. On success, + * writes the parsed number to *result. */ +static char *parse_iso6709(char *p, double *result) +{ + double v, sign; + char *pend; + size_t len; + + if (*p == '+') + sign = 1.0; + else if (*p == '-') + sign = -1.0; + else + return NULL; + + p++; + for (pend = p; *pend >= '0' && *pend <= '9'; pend++) + ;; + + /* Annoying encoding used by zone.tab has no decimal point, so use + * the length to determine the format: + * + * 4 = DDMM + * 5 = DDDMM + * 6 = DDMMSS + * 7 = DDDMMSS + */ + len = pend - p; + if (len < 4 || len > 7) { + return NULL; + } + + /* p => [D]DD */ + v = (p[0] - '0') * 10.0 + (p[1] - '0'); + p += 2; + if (len == 5 || len == 7) + v = v * 10.0 + (*p++ - '0'); + /* p => MM[SS] */ + v += (10.0 * (p[0] - '0') + + p[1] - '0') / 60.0; + p += 2; + /* p => [SS] */ + if (len > 5) { + v += (10.0 * (p[0] - '0') + + p[1] - '0') / 3600.0; + p += 2; + } + + /* Round to five decimal place, not because it's a good idea, + * but, because the builtin data uses rounded data, so, match + * that. */ + *result = sign * (int)(v * 100000.0 + 0.5) / 100000.0; + + return p; +} + +/* This function parses the zone.tab file to build up the mapping of + * timezone to country code and geographic location, and returns a + * hash table. The hash table is indexed by the function: + * + * tz_hash(timezone-name) + */ +static struct location_info **create_location_table(void) +{ + struct location_info **li, *i; + char zone_tab[PATH_MAX]; + char line[512]; + FILE *fp; + + strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab); + + fp = fopen(zone_tab, "r"); + if (!fp) { + return NULL; + } + + li = calloc(LOCINFO_HASH_SIZE, sizeof *li); + + while (fgets(line, sizeof line, fp)) { + char *p = line, *code, *name, *comment; + uint32_t hash; + double latitude, longitude; + + while (isspace(*p)) + p++; + + if (*p == '#' || *p == '\0' || *p == '\n') + continue; + + if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t') + continue; + + /* code => AA */ + code = p; + p[2] = 0; + p += 3; + + /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */ + p = parse_iso6709(p, &latitude); + if (!p) { + continue; + } + p = parse_iso6709(p, &longitude); + if (!p) { + continue; + } + + if (!p || *p != '\t') { + continue; + } + + /* name = string */ + name = ++p; + while (*p != '\t' && *p && *p != '\n') + p++; + + *p++ = '\0'; + + /* comment = string */ + comment = p; + while (*p != '\t' && *p && *p != '\n') + p++; + + if (*p == '\n' || *p == '\t') + *p = '\0'; + + hash = tz_hash(name); + i = malloc(sizeof *i); + memcpy(i->code, code, 2); + strncpy(i->name, name, sizeof i->name); + i->comment = strdup(comment); + i->longitude = longitude; + i->latitude = latitude; + i->next = li[hash]; + li[hash] = i; + /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */ + } + + fclose(fp); + + return li; +} + +/* Return location info from hash table, using given timezone name. + * Returns NULL if the name could not be found. */ +const struct location_info *find_zone_info(struct location_info **li, + const char *name) +{ + uint32_t hash = tz_hash(name); + const struct location_info *l; + + if (!li) { + return NULL; + } + + for (l = li[hash]; l; l = l->next) { + if (strcasecmp(l->name, name) == 0) + return l; + } + + return NULL; +} + +/* Filter out some non-tzdata files and the posix/right databases, if + * present. */ +static int index_filter(const struct dirent *ent) +{ + return strcmp(ent->d_name, ".") != 0 + && strcmp(ent->d_name, "..") != 0 + && strcmp(ent->d_name, "posix") != 0 + && strcmp(ent->d_name, "posixrules") != 0 + && strcmp(ent->d_name, "right") != 0 + && strstr(ent->d_name, ".tab") == NULL; +} + +/* Comparison callback for qsort(), used to alpha-sort the index + * array by timezone name. */ +static int sysdbcmp(const void *first, const void *second) +{ + const timelib_tzdb_index_entry *alpha = first, *beta = second; + + return strcmp(alpha->id, beta->id); +} + + +/* Create the zone identifier index by trawling the filesystem. */ +static void create_zone_index(timelib_tzdb *db) +{ + size_t dirstack_size, dirstack_top; + size_t index_size, index_next; + timelib_tzdb_index_entry *db_index; + char **dirstack; + + /* LIFO stack to hold directory entries to scan; each slot is a + * directory name relative to the zoneinfo prefix. */ + dirstack_size = 32; + dirstack = malloc(dirstack_size * sizeof *dirstack); + dirstack_top = 1; + dirstack[0] = strdup(""); + + /* Index array. */ + index_size = 64; + db_index = malloc(index_size * sizeof *db_index); + index_next = 0; + + do { + struct dirent **ents; + char name[PATH_MAX], *top; + int count; + + /* Pop the top stack entry, and iterate through its contents. */ + top = dirstack[--dirstack_top]; + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top); + + count = php_scandir(name, &ents, index_filter, php_alphasort); + + while (count > 0) { + struct stat st; + const char *leaf = ents[count - 1]->d_name; + + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s", + top, leaf); + + if (strlen(name) && stat(name, &st) == 0) { + /* Name, relative to the zoneinfo prefix. */ + const char *root = top; + + if (root[0] == '/') root++; + + snprintf(name, sizeof name, "%s%s%s", root, + *root ? "/": "", leaf); + + if (S_ISDIR(st.st_mode)) { + if (dirstack_top == dirstack_size) { + dirstack_size *= 2; + dirstack = realloc(dirstack, + dirstack_size * sizeof *dirstack); + } + dirstack[dirstack_top++] = strdup(name); + } + else { + if (index_next == index_size) { + index_size *= 2; + db_index = realloc(db_index, + index_size * sizeof *db_index); + } + + db_index[index_next++].id = strdup(name); + } + } + + free(ents[--count]); + } + + if (count != -1) free(ents); + free(top); + } while (dirstack_top); + + /* Alpha-sort the index array; shouldn't be technically necessary + * but some of the test cases rely on this, and, it matches the + * builtin database. */ + qsort(db_index, index_next, sizeof *db_index, sysdbcmp); + + db->index = db_index; + db->index_size = index_next; + + free(dirstack); +} + +#define FAKE_HEADER "1234\0??\1??" +#define FAKE_BC_POS (0) +#define FAKE_UTC_POS (7 - 4) + +/* Create a fake data segment for database 'sysdb'. This mocks + * up a fake ->data segment for the given timezone database. + * php_date.c::timezone_identifiers_list() looks at data[pos + 4] + * through data[pos + 6] to compare the country code and BC flag, + * which are stored in the builtin data array like: + * + * (pos + 4) => BC flag + * (pos + 5, pos + 6) => Two chars of country code + * + * where pos is the index corresponding to the timezone name. + * + * Timezone names are classified here into three types: + * 1) UTC, which is special + * 2) "normal" zone names + * 3) "backwards-compat" zone names + * + * (boolean logic of the BC flag seems to be inverted, but hey) + * + * UTC is special since it has BC=\1, code = "??" + * "normal" zones exist in zone.tab and have the given c-code and BC=\1 + * "backwards-compat" zones don't exist in zone.tab and have BC=\0 + * + * Since UTC and the BC zones are constant, they are encoded in the + * FAKE_HEADER prefix, and pos pointers index into that. + * + * FAKE_HEADER is hence four random bytes, then the BC zone segment + * (three bytes), then the UTC zone segment (another three). + * + * For all "normal" zones, three bytes are appended to the data array; + * the BC flag, always 1, and the two bytes of country code. + */ +static void fake_data_segment(timelib_tzdb *sysdb, + struct location_info **info) +{ + size_t n; + char *data, *p; + + /* Worst case maximum is 3 bytes per zone, plus the header. */ + data = malloc((3 * sysdb->index_size) + sizeof(FAKE_HEADER) - 1); + + /* Append the fake header, p then = next byte */ + p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1); + + for (n = 0; n < sysdb->index_size; n++) { + const struct location_info *li; + timelib_tzdb_index_entry *ent; + + /* Lost const'ness since we're modifying the pos pointer. */ + ent = (timelib_tzdb_index_entry *)&sysdb->index[n]; + + /* Lookup the timezone name in the hash table. */ + if (strcmp(ent->id, "UTC") == 0) { + ent->pos = FAKE_UTC_POS; + continue; + } + + li = find_zone_info(info, ent->id); + if (li) { + /* If found, append the BC byte and the country code; set + * the position index for the timezone to point to + * this. */ + ent->pos = (p - data) - 4; + *p++ = '\x01'; + *p++ = li->code[0]; + *p++ = li->code[1]; + } + else { + /* If not found, the timezone data can + * point at the header. */ + ent->pos = 0; + } + } + + /* Store the fake data array */ + sysdb->data = (unsigned char *)data; +} + +/* Evaluates to true if given timezone name is valid. */ +#define is_valid_tz_name(tz_) (tz_[0] && strstr(tz_, "..") == NULL) + +/* Return the mmap()ed tzfile if found, else NULL. On success, the + * length of the mapped data is placed in *length. */ +static char *map_tzfile(const char *timezone, size_t *length) +{ + char fname[PATH_MAX]; + struct stat st; + char *p; + int fd; + + if (!is_valid_tz_name(timezone)) { + return NULL; + } + + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone); + + fd = open(fname, O_RDONLY); + if (fd == -1) { + return NULL; + } else if (fstat(fd, &st) != 0 || st.st_size < 21) { + close(fd); + return NULL; + } + + *length = st.st_size; + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + close(fd); + + return p != MAP_FAILED ? p : NULL; +} +#endif + +/* seek_to_tz_position() for a builtin/external database. */ +static int inmem_seek_to_tz_position(const unsigned char **tzf, + char *timezone, const timelib_tzdb *tzdb) { int left = 0, right = tzdb->index_size - 1; #ifdef HAVE_SETLOCALE @@ -295,36 +743,131 @@ static int seek_to_tz_position(const uns return 0; } +/* Modified seek_to_tz_position wrapper which handles the system + * database and the builtin/external databases in the same way. + * Returns zero on failure on non-zero on success. On success, (*map, + * *maplen) is an mmap'ed region if *map is non-NULL, and must be + * munmaped after use. */ +static int seek_to_tz_position(const unsigned char **tzf, char *timezone, + char **map, size_t *maplen, + const timelib_tzdb *tzdb) +{ +#ifdef HAVE_SYSTEM_TZDATA + if (tzdb == timezonedb_system) { + char *orig; + + orig = map_tzfile(timezone, maplen); + if (orig == NULL) { + return 0; + } + + (*tzf) = (unsigned char *)orig ; + *map = orig; + + return 1; + } + else +#endif + { + return inmem_seek_to_tz_position(tzf, timezone, tzdb); + } +} + const timelib_tzdb *timelib_builtin_db(void) { +#ifdef HAVE_SYSTEM_TZDATA + if (timezonedb_system == NULL) { + timelib_tzdb *tmp = malloc(sizeof *tmp); + + tmp->version = "0.system"; + tmp->data = NULL; + create_zone_index(tmp); + system_location_table = create_location_table(); + fake_data_segment(tmp, system_location_table); + timezonedb_system = tmp; + } + + return timezonedb_system; +#else return &timezonedb_builtin; +#endif } const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count) { +#ifdef HAVE_SYSTEM_TZDATA + *count = timezonedb_system->index_size; + return timezonedb_system->index; +#else *count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin); return timezonedb_idx_builtin; +#endif } int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb) { const unsigned char *tzf; - return (seek_to_tz_position(&tzf, timezone, tzdb)); + +#ifdef HAVE_SYSTEM_TZDATA + if (tzdb == timezonedb_system) { + char fname[PATH_MAX]; + struct stat st; + + if (!is_valid_tz_name(timezone)) { + return 0; + } + + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone); + + return stat(fname, &st) == 0 && S_ISREG(st.st_mode); + } +#endif + + return (inmem_seek_to_tz_position(&tzf, timezone, tzdb)); } timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb) { const unsigned char *tzf; + char *memmap = NULL; + size_t maplen; timelib_tzinfo *tmp; - if (seek_to_tz_position(&tzf, timezone, tzdb)) { + if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) { tmp = timelib_tzinfo_ctor(timezone); read_preamble(&tzf, tmp); read_header(&tzf, tmp); read_transistions(&tzf, tmp); read_types(&tzf, tmp); - read_location(&tzf, tmp); + +#ifdef HAVE_SYSTEM_TZDATA + if (memmap) { + const struct location_info *li; + + /* TZif-style - grok the location info from the system database, + * if possible. */ + if ((li = find_zone_info(system_location_table, timezone)) != NULL) { + tmp->location.comments = strdup(li->comment); + strncpy(tmp->location.country_code, li->code, 2); + tmp->location.longitude = li->longitude; + tmp->location.latitude = li->latitude; + tmp->bc = 1; + } + else { + strcpy(tmp->location.country_code, "??"); + tmp->bc = 0; + tmp->location.comments = strdup(""); + } + + /* Now done with the mmap segment - discard it. */ + munmap(memmap, maplen); + } else +#endif + { + /* PHP-style - use the embedded info. */ + read_location(&tzf, tmp); + } } else { tmp = NULL; } --- a/ext/date/lib/timelib.m4 +++ b/ext/date/lib/timelib.m4 @@ -78,3 +78,17 @@ stdlib.h dnl Check for strtoll, atoll AC_CHECK_FUNCS(strtoll atoll strftime) + +PHP_ARG_WITH(system-tzdata, for use of system timezone data, +[ --with-system-tzdata[=DIR] to specify use of system timezone data], +no, no) + +if test "$PHP_SYSTEM_TZDATA" != "no"; then + AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used]) + + if test "$PHP_SYSTEM_TZDATA" != "yes"; then + AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA", + [Define for location of system timezone data]) + fi +fi +