Initial commit, stdint, added comments
This commit is contained in:
154
src/atom.c
Normal file
154
src/atom.c
Normal file
@@ -0,0 +1,154 @@
|
||||
#include "../include/atom.h"
|
||||
#include "../include/mem.h"
|
||||
#include "../include/except.h"
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
static struct Atom {
|
||||
struct Atom *link; // Pointer to next atom (used for collisions in hash table)
|
||||
int64_t len; // Length of the string
|
||||
char *str; // The actual string data
|
||||
} *buckets[2048]; // Array of 2048 atom "buckets" (used for the hash table)
|
||||
|
||||
// Predefined scatter values, used for generating hash values.
|
||||
static uint32_t scatter[] = {
|
||||
2078917053, 143302914, 1027100827, 1953210302, 755253631, 2002600785,
|
||||
1405390230, 45248011, 1099951567, 433832350, 2018585307, 438263339,
|
||||
813528929, 1703199216, 618906479, 573714703, 766270699, 275680090,
|
||||
1510320440, 1583583926, 1723401032, 1965443329, 1098183682, 1636505764,
|
||||
980071615, 1011597961, 643279273, 1315461275, 157584038, 1069844923,
|
||||
471560540, 89017443, 1213147837, 1498661368, 2042227746, 1968401469,
|
||||
1353778505, 1300134328, 2013649480, 306246424, 1733966678, 1884751139,
|
||||
744509763, 400011959, 1440466707, 1363416242, 973726663, 59253759,
|
||||
1639096332, 336563455, 1642837685, 1215013716, 154523136, 593537720,
|
||||
704035832, 1134594751, 1605135681, 1347315106, 302572379, 1762719719,
|
||||
269676381, 774132919, 1851737163, 1482824219, 125310639, 1746481261,
|
||||
1303742040, 1479089144, 899131941, 1169907872, 1785335569, 485614972,
|
||||
907175364, 382361684, 885626931, 200158423, 1745777927, 1859353594,
|
||||
259412182, 1237390611, 48433401, 1902249868, 304920680, 202956538,
|
||||
348303940, 1008956512, 1337551289, 1953439621, 208787970, 1640123668,
|
||||
1568675693, 478464352, 266772940, 1272929208, 1961288571, 392083579,
|
||||
871926821, 1117546963, 1871172724, 1771058762, 139971187, 1509024645,
|
||||
109190086, 1047146551, 1891386329, 994817018, 1247304975, 1489680608,
|
||||
706686964, 1506717157, 579587572, 755120366, 1261483377, 884508252,
|
||||
958076904, 1609787317, 1893464764, 148144545, 1415743291, 2102252735,
|
||||
1788268214, 836935336, 433233439, 2055041154, 2109864544, 247038362,
|
||||
299641085, 834307717, 1364585325, 23330161, 457882831, 1504556512,
|
||||
1532354806, 567072918, 404219416, 1276257488, 1561889936, 1651524391,
|
||||
618454448, 121093252, 1010757900, 1198042020, 876213618, 124757630,
|
||||
2082550272, 1834290522, 1734544947, 1828531389, 1982435068, 1002804590,
|
||||
1783300476, 1623219634, 1839739926, 69050267, 1530777140, 1802120822,
|
||||
316088629, 1830418225, 488944891, 1680673954, 1853748387, 946827723,
|
||||
1037746818, 1238619545, 1513900641, 1441966234, 367393385, 928306929,
|
||||
946006977, 985847834, 1049400181, 1956764878, 36406206, 1925613800,
|
||||
2081522508, 2118956479, 1612420674, 1668583807, 1800004220, 1447372094,
|
||||
523904750, 1435821048, 923108080, 216161028, 1504871315, 306401572,
|
||||
2018281851, 1820959944, 2136819798, 359743094, 1354150250, 1843084537,
|
||||
1306570817, 244413420, 934220434, 672987810, 1686379655, 1301613820,
|
||||
1601294739, 484902984, 139978006, 503211273, 294184214, 176384212,
|
||||
281341425, 228223074, 147857043, 1893762099, 1896806882, 1947861263,
|
||||
1193650546, 273227984, 1236198663, 2116758626, 489389012, 593586330,
|
||||
275676551, 360187215, 267062626, 265012701, 719930310, 1621212876,
|
||||
2108097238, 2026501127, 1865626297, 894834024, 552005290, 1404522304,
|
||||
48964196, 5816381, 1889425288, 188942202, 509027654, 36125855,
|
||||
365326415, 790369079, 264348929, 513183458, 536647531, 13672163,
|
||||
313561074, 1730298077, 286900147, 1549759737, 1699573055, 776289160,
|
||||
2143346068, 1975249606, 1136476375, 262925046, 92778659, 1856406685,
|
||||
1884137923, 53392249, 1735424165, 1602280572
|
||||
};
|
||||
|
||||
// Function to get the atom corresponding to a string
|
||||
const char *atom_string(const char *str) {
|
||||
assert(str); // Ensure the string is not NULL
|
||||
return atom_new(str, strlen(str)); // Call atom_new to retrieve the atom
|
||||
}
|
||||
|
||||
// Function to convert an integer to an atom (string representation of the number)
|
||||
const char *atom_int(int64_t n) {
|
||||
char str[43];
|
||||
char *s = str + sizeof str;
|
||||
size_t m; // Used to hold the absolute value of the number
|
||||
|
||||
// Handle the special case of the minimum long value (LONG_MIN) which cannot be negated
|
||||
if (n == LONG_MIN)
|
||||
m = LONG_MAX + 1UL; // Adjust for LONG_MIN
|
||||
else if (n < 0)
|
||||
m = -n; // Take the absolute value if the number is negative
|
||||
else
|
||||
m = n; // Use the number directly if it is positive
|
||||
|
||||
// Convert the number to a string by repeatedly taking modulus 10 (for digits)
|
||||
do
|
||||
*--s = m % 10 + '0'; // Store the digit as a character
|
||||
while ((m /= 10) > 0); // Divide by 10 until no digits are left
|
||||
|
||||
// If the number is negative, add the '-' sign at the front
|
||||
if (n < 0)
|
||||
*--s = '-';
|
||||
|
||||
// Return the atom corresponding to the string
|
||||
return atom_new(s, (str + sizeof str) - s);
|
||||
}
|
||||
|
||||
// Function to create or retrieve an atom corresponding to a string with a given length
|
||||
const char *atom_new(const char *str, int64_t len) {
|
||||
uint32_t h; // hash value of the string
|
||||
int i;
|
||||
struct Atom *p;
|
||||
|
||||
assert(str); // Ensure the string is not NULL
|
||||
assert(len >= 0); // Ensure the length is non-negative
|
||||
|
||||
// Compute the hash value for the string using scatter.
|
||||
for (h = 0, i = 0; i < len; i++)
|
||||
h = (h << 1) + scatter[(unsigned char)str[i]]; // Use the scatter array for hash generation
|
||||
|
||||
h &= MEM_COUNT(buckets) - 1; // Modulo the hash by the number of buckets to ensure it fits
|
||||
|
||||
// Search for an existing atom with the same string and length (to avoid duplicates)
|
||||
for (p = buckets[h]; p; p = p->link)
|
||||
if (len == p->len) {
|
||||
// If the atom has the same length, compare each character to check for equality
|
||||
for (i = 0; i < len && p->str[i] == str[i]; )
|
||||
i++;
|
||||
// If the strings match, return the existing atom's string
|
||||
if (i == len)
|
||||
return p->str;
|
||||
}
|
||||
|
||||
// If no matching atom was found, allocate a new one
|
||||
p = ALLOC(sizeof (*p) + len + 1); // Allocate memory for the new atom
|
||||
p->len = len; // Set the length of the string
|
||||
p->str = (char *)(p + 1); // The string follows the atom structure in memory
|
||||
|
||||
// Copy the string data into the newly allocated atom (if the length is greater than 0)
|
||||
if (len > 0)
|
||||
memcpy(p->str, str, len);
|
||||
|
||||
p->str[len] = '\0'; // Null-terminate the string
|
||||
|
||||
// Insert the new atom into the corresponding hash bucket
|
||||
p->link = buckets[h];
|
||||
buckets[h] = p;
|
||||
|
||||
// Return the string of the new atom
|
||||
return p->str;
|
||||
}
|
||||
|
||||
// Function to get the length of an atom (string)
|
||||
int atom_length(const char *str) {
|
||||
struct Atom *p;
|
||||
int i;
|
||||
|
||||
assert(str); // Ensure the string is not NULL
|
||||
|
||||
// Search through all the atom buckets to find the atom corresponding to the string
|
||||
for (i = 0; i < MEM_COUNT(buckets); i++)
|
||||
for (p = buckets[i]; p; p = p->link)
|
||||
if (p->str == str) // If the string matches, return the length
|
||||
return p->len;
|
||||
|
||||
// If the atom was not found, there's an error in the program
|
||||
assert(0);
|
||||
return 0; // This line is unreachable, just to avoid a compiler warning
|
||||
}
|
||||
Reference in New Issue
Block a user