rockbox/apps/plugins/puzzles/src/matching.c
Franklin Wei b3356e3aff puzzles: resync with upstream
This brings the code to upstream commit 3ece3d6 (I've made my own Rockbox-
specific changes on top of that).

Changes include using C99 `bool' throughout, and minor logic fixes for some
puzzles.

Change-Id: Ie823e73ae49a8ee1de411d6d406df2ba835af541
2018-12-21 22:13:33 -05:00

753 lines
21 KiB
C

/*
* Implementation of matching.h.
*/
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include "puzzles.h"
#include "matching.h"
struct scratch {
/*
* Current contents of the in-progress matching. LtoR is an array
* of nl integers, each of which holds a value in {0,1,...,nr-1},
* or -1 for no current assignment. RtoL is exactly the reverse.
*
* Invariant: LtoR[i] is non-empty and equal to j if and only if
* RtoL[j] is non-empty and equal to i.
*/
int *LtoR, *RtoL;
/*
* Arrays of nl and nr integer respectively, giving the layer
* assigned to each integer in the breadth-first search step of
* the algorithm.
*/
int *Llayer, *Rlayer;
/*
* Arrays of nl and nr integers respectively, used to hold the
* to-do queues in the breadth-first search.
*/
int *Lqueue, *Rqueue;
/*
* An augmenting path of vertices, alternating between L vertices
* (in the even-numbered positions, starting at 0) and R (in the
* odd positions). Must be long enough to hold any such path that
* never repeats a vertex, i.e. must be at least 2*min(nl,nr) in
* size.
*/
int *augpath;
/*
* Track the progress of the depth-first search at each
* even-numbered layer. Has one element for each even-numbered
* position in augpath.
*/
int *dfsstate;
/*
* Store a random permutation of the L vertex indices, if we're
* randomising the dfs phase.
*/
int *Lorder;
};
size_t matching_scratch_size(int nl, int nr)
{
size_t n;
int nmin = (nl < nr ? nl : nr);
n = (sizeof(struct scratch) + sizeof(int)-1)/sizeof(int);
n += nl; /* LtoR */
n += nr; /* RtoL */
n += nl; /* Llayer */
n += nr; /* Rlayer */
n += nl; /* Lqueue */
n += nr; /* Rqueue */
n += 2*nmin; /* augpath */
n += nmin; /* dfsstate */
n += nl; /* Lorder */
return n * sizeof(int);
}
int matching_with_scratch(void *scratchv,
int nl, int nr, int **adjlists, int *adjsizes,
random_state *rs, int *outl, int *outr)
{
struct scratch *s = (struct scratch *)scratchv;
int L, R, i, j;
/*
* Set up the various array pointers in the scratch space.
*/
{
int *p = scratchv;
int nmin = (nl < nr ? nl : nr);
p += (sizeof(struct scratch) + sizeof(int)-1)/sizeof(int);
s->LtoR = p; p += nl;
s->RtoL = p; p += nr;
s->Llayer = p; p += nl;
s->Rlayer = p; p += nr;
s->Lqueue = p; p += nl;
s->Rqueue = p; p += nr;
s->augpath = p; p += 2*nmin;
s->dfsstate = p; p += nmin;
s->Lorder = p; p += nl;
}
/*
* Set up the initial matching, which is empty.
*/
for (L = 0; L < nl; L++)
s->LtoR[L] = -1;
for (R = 0; R < nr; R++)
s->RtoL[R] = -1;
while (1) {
/*
* Breadth-first search starting from the unassigned left
* vertices, traversing edges from left to right only if they
* are _not_ part of the matching, and from right to left only
* if they _are_. We assign a 'layer number' to all vertices
* visited by this search, with the starting vertices being
* layer 0 and every successor of a layer-n node being layer
* n+1.
*/
int Lqs, Rqs, layer, target_layer;
for (L = 0; L < nl; L++)
s->Llayer[L] = -1;
for (R = 0; R < nr; R++)
s->Rlayer[R] = -1;
Lqs = 0;
for (L = 0; L < nl; L++) {
if (s->LtoR[L] == -1) {
s->Llayer[L] = 0;
s->Lqueue[Lqs++] = L;
}
}
layer = 0;
while (1) {
bool found_free_R_vertex = false;
Rqs = 0;
for (i = 0; i < Lqs; i++) {
L = s->Lqueue[i];
assert(s->Llayer[L] == layer);
for (j = 0; j < adjsizes[L]; j++) {
R = adjlists[L][j];
if (R != s->LtoR[L] && s->Rlayer[R] == -1) {
s->Rlayer[R] = layer+1;
s->Rqueue[Rqs++] = R;
if (s->RtoL[R] == -1)
found_free_R_vertex = true;
}
}
}
layer++;
if (found_free_R_vertex)
break;
if (Rqs == 0)
goto done;
Lqs = 0;
for (j = 0; j < Rqs; j++) {
R = s->Rqueue[j];
assert(s->Rlayer[R] == layer);
if ((L = s->RtoL[R]) != -1 && s->Llayer[L] == -1) {
s->Llayer[L] = layer+1;
s->Lqueue[Lqs++] = L;
}
}
layer++;
if (Lqs == 0)
goto done;
}
target_layer = layer;
/*
* Vertices in the target layer are only interesting if
* they're actually unassigned. Blanking out the others here
* will save us a special case in the dfs loop below.
*/
for (R = 0; R < nr; R++)
if (s->Rlayer[R] == target_layer && s->RtoL[R] != -1)
s->Rlayer[R] = -1;
/*
* Choose an ordering in which to try the L vertices at the
* start of the next pass.
*/
for (L = 0; L < nl; L++)
s->Lorder[L] = L;
if (rs)
shuffle(s->Lorder, nl, sizeof(*s->Lorder), rs);
/*
* Now depth-first search through that layered set of vertices
* to find as many (vertex-)disjoint augmenting paths as we
* can, and for each one we find, augment the matching.
*/
s->dfsstate[0] = 0;
i = 0;
while (1) {
/*
* Find the next vertex to go on the end of augpath.
*/
if (i == 0) {
/* In this special case, we're just looking for L
* vertices that are not yet assigned. */
if (s->dfsstate[i] == nl)
break; /* entire DFS has finished */
L = s->Lorder[s->dfsstate[i]++];
if (s->Llayer[L] != 2*i)
continue; /* skip this vertex */
} else {
/* In the more usual case, we're going through the
* adjacency list for the previous L vertex. */
L = s->augpath[2*i-2];
j = s->dfsstate[i]++;
if (j == adjsizes[L]) {
/* Run out of neighbours of the previous vertex. */
i--;
continue;
}
if (rs && adjsizes[L] - j > 1) {
int which = j + random_upto(rs, adjsizes[L] - j);
int tmp = adjlists[L][which];
adjlists[L][which] = adjlists[L][j];
adjlists[L][j] = tmp;
}
R = adjlists[L][j];
if (s->Rlayer[R] != 2*i-1)
continue; /* skip this vertex */
s->augpath[2*i-1] = R;
s->Rlayer[R] = -1; /* mark vertex as visited */
if (2*i-1 == target_layer) {
/*
* We've found an augmenting path, in the form of
* an even-sized list of vertices alternating
* L,R,...,L,R, with the initial L and final R
* vertex free and otherwise each R currently
* connected to the next L. Adjust so that each L
* connects to the next R, increasing the edge
* count in the matching by 1.
*/
for (j = 0; j < 2*i; j += 2) {
s->LtoR[s->augpath[j]] = s->augpath[j+1];
s->RtoL[s->augpath[j+1]] = s->augpath[j];
}
/*
* Having dealt with that path, and already marked
* all its vertices as visited, rewind right to
* the start and resume our DFS from a new
* starting L-vertex.
*/
i = 0;
continue;
}
L = s->RtoL[R];
if (s->Llayer[L] != 2*i)
continue; /* skip this vertex */
}
s->augpath[2*i] = L;
s->Llayer[L] = -1; /* mark vertex as visited */
i++;
s->dfsstate[i] = 0;
}
}
done:
/*
* Fill in the output arrays.
*/
if (outl) {
for (i = 0; i < nl; i++)
outl[i] = s->LtoR[i];
}
if (outr) {
for (j = 0; j < nr; j++)
outr[j] = s->RtoL[j];
}
/*
* Return the number of matching edges.
*/
for (i = j = 0; i < nl; i++)
if (s->LtoR[i] != -1)
j++;
return j;
}
int matching(int nl, int nr, int **adjlists, int *adjsizes,
random_state *rs, int *outl, int *outr)
{
void *scratch;
int size;
int ret;
size = matching_scratch_size(nl, nr);
scratch = malloc(size);
if (!scratch)
return -1;
ret = matching_with_scratch(scratch, nl, nr, adjlists, adjsizes,
rs, outl, outr);
free(scratch);
return ret;
}
#ifdef STANDALONE_MATCHING_TEST
/*
* Diagnostic routine used in testing this algorithm. It is passed a
* pointer to a piece of scratch space that's just been used by
* matching_with_scratch, and extracts from it a labelling of the
* input graph that acts as a 'witness' to the maximality of the
* returned matching.
*
* The output parameter 'witness' should be an array of (nl+nr)
* integers, indexed such that witness[L] corresponds to an L-vertex (for
* L=0,1,...,nl-1) and witness[nl+R] corresponds to an R-vertex (for
* R=0,1,...,nr-1). On return, this array will assign each vertex a
* label which is either 0 or 1, and the following properties will
* hold:
*
* + all vertices not paired up by the matching are type L0 or R1
* + every L0->R1 edge is used by the matching
* + no L1->R0 edge is used by the matching.
*
* The mere existence of such a labelling is enough to prove the
* maximality of the matching, because if there is any larger matching
* then its symmetric difference with this one must include at least
* one 'augmenting path', which starts at a free L-vertex and ends at
* a free R-vertex, traversing only unused L->R edges and only used
* R->L edges. But that would mean it starts at an L0, ends at an R1,
* and never follows an edge that can get from an 0 to a 1.
*/
static void matching_witness(void *scratchv, int nl, int nr, int *witness)
{
struct scratch *s = (struct scratch *)scratchv;
int i, j;
for (i = 0; i < nl; i++)
witness[i] = s->Llayer[i] == -1;
for (j = 0; j < nr; j++)
witness[nl + j] = s->Rlayer[j] == -1;
}
/*
* Standalone tool to run the matching algorithm.
*/
#include <string.h>
#include <ctype.h>
#include <time.h>
#include "tree234.h"
int nl, nr, count;
int **adjlists, *adjsizes;
int *adjdata, *outl, *outr, *witness;
void *scratch;
random_state *rs;
void allocate(int nl_, int nr_, int maxedges)
{
nl = nl_;
nr = nr_;
adjdata = snewn(maxedges, int);
adjlists = snewn(nl, int *);
adjsizes = snewn(nl, int);
outl = snewn(nl, int);
outr = snewn(nr, int);
witness = snewn(nl+nr, int);
scratch = smalloc(matching_scratch_size(nl, nr));
}
void deallocate(void)
{
sfree(adjlists);
sfree(adjsizes);
sfree(adjdata);
sfree(outl);
sfree(outr);
sfree(witness);
sfree(scratch);
}
void find_and_check_matching(void)
{
int i, j, k;
count = matching_with_scratch(scratch, nl, nr, adjlists, adjsizes,
rs, outl, outr);
matching_witness(scratch, nl, nr, witness);
for (i = j = 0; i < nl; i++) {
if (outl[i] != -1) {
assert(0 <= outl[i] && outl[i] < nr);
assert(outr[outl[i]] == i);
j++;
for (k = 0; k < adjsizes[i]; k++)
if (adjlists[i][k] == outl[i])
break;
assert(k < adjsizes[i]);
}
}
assert(j == count);
for (i = j = 0; i < nr; i++) {
if (outr[i] != -1) {
assert(0 <= outr[i] && outr[i] < nl);
assert(outl[outr[i]] == i);
j++;
}
}
assert(j == count);
for (i = 0; i < nl; i++) {
if (outl[i] == -1)
assert(witness[i] == 0);
}
for (i = 0; i < nr; i++) {
if (outr[i] == -1)
assert(witness[nl+i] == 1);
}
for (i = 0; i < nl; i++) {
for (j = 0; j < adjsizes[i]; j++) {
k = adjlists[i][j];
if (outl[i] == k)
assert(!(witness[i] == 1 && witness[nl+k] == 0));
else
assert(!(witness[i] == 0 && witness[nl+k] == 1));
}
}
}
struct nodename {
const char *name;
int index;
};
int compare_nodes(void *av, void *bv)
{
const struct nodename *a = (const struct nodename *)av;
const struct nodename *b = (const struct nodename *)bv;
return strcmp(a->name, b->name);
}
int node_index(tree234 *n2i, tree234 *i2n, const char *name)
{
struct nodename *nn, *nn_prev;
char *namedup = dupstr(name);
nn = snew(struct nodename);
nn->name = namedup;
nn->index = count234(n2i);
nn_prev = add234(n2i, nn);
if (nn_prev != nn) {
sfree(nn);
sfree(namedup);
} else {
addpos234(i2n, nn, nn->index);
}
return nn_prev->index;
}
struct edge {
int L, R;
};
int compare_edges(void *av, void *bv)
{
const struct edge *a = (const struct edge *)av;
const struct edge *b = (const struct edge *)bv;
if (a->L < b->L) return -1;
if (a->L > b->L) return +1;
if (a->R < b->R) return -1;
if (a->R > b->R) return +1;
return 0;
}
void matching_from_user_input(FILE *fp, const char *filename)
{
tree234 *Ln2i, *Li2n, *Rn2i, *Ri2n, *edges;
char *line = NULL;
struct edge *e;
int i, lineno = 0;
int *adjptr;
Ln2i = newtree234(compare_nodes);
Rn2i = newtree234(compare_nodes);
Li2n = newtree234(NULL);
Ri2n = newtree234(NULL);
edges = newtree234(compare_edges);
while (sfree(line), lineno++, (line = fgetline(fp)) != NULL) {
char *p, *Lname, *Rname;
p = line;
while (*p && isspace((unsigned char)*p)) p++;
if (!*p)
continue;
Lname = p;
while (*p && !isspace((unsigned char)*p)) p++;
if (*p)
*p++ = '\0';
while (*p && isspace((unsigned char)*p)) p++;
if (!*p) {
fprintf(stderr, "%s:%d: expected 2 words, found 1\n",
filename, lineno);
exit(1);
}
Rname = p;
while (*p && !isspace((unsigned char)*p)) p++;
if (*p)
*p++ = '\0';
while (*p && isspace((unsigned char)*p)) p++;
if (*p) {
fprintf(stderr, "%s:%d: expected 2 words, found more\n",
filename, lineno);
exit(1);
}
e = snew(struct edge);
e->L = node_index(Ln2i, Li2n, Lname);
e->R = node_index(Rn2i, Ri2n, Rname);
if (add234(edges, e) != e) {
fprintf(stderr, "%s:%d: duplicate edge\n",
filename, lineno);
exit(1);
}
}
allocate(count234(Ln2i), count234(Rn2i), count234(edges));
adjptr = adjdata;
for (i = 0; i < nl; i++)
adjlists[i] = NULL;
for (i = 0; (e = index234(edges, i)) != NULL; i++) {
if (!adjlists[e->L])
adjlists[e->L] = adjptr;
*adjptr++ = e->R;
adjsizes[e->L] = adjptr - adjlists[e->L];
}
find_and_check_matching();
for (i = 0; i < nl; i++) {
if (outl[i] != -1) {
struct nodename *Lnn = index234(Li2n, i);
struct nodename *Rnn = index234(Ri2n, outl[i]);
printf("%s %s\n", Lnn->name, Rnn->name);
}
}
}
void test_subsets(void)
{
int b = 8;
int n = 1 << b;
int i, j, nruns, expected_size;
int *adjptr;
int *edgecounts;
struct stats {
int min, max;
double n, sx, sxx;
} *stats;
static const char seed[] = "fixed random seed for repeatability";
/*
* Generate a graph in which every subset of [b] = {1,...,b}
* (represented as a b-bit integer 0 <= i < n) has an edge going
* to every subset obtained by removing exactly one element.
*
* This graph is the disjoint union of the corresponding graph for
* each layer (collection of same-sized subset) of the power set
* of [b]. Each of those graphs has a matching of size equal to
* the smaller of its vertex sets. So we expect the overall size
* of the output matching to be less than n by the size of the
* largest layer, that is, to be n - binomial(n, floor(n/2)).
*
* We run the generation repeatedly, randomising it every time,
* and we expect to see every possible edge appear sooner or
* later.
*/
rs = random_new(seed, strlen(seed));
allocate(n, n, n*b);
adjptr = adjdata;
expected_size = 0;
for (i = 0; i < n; i++) {
adjlists[i] = adjptr;
for (j = 0; j < b; j++) {
if (i & (1 << j))
*adjptr++ = i & ~(1 << j);
}
adjsizes[i] = adjptr - adjlists[i];
if (adjsizes[i] != b/2)
expected_size++;
}
edgecounts = snewn(n*b, int);
for (i = 0; i < n*b; i++)
edgecounts[i] = 0;
stats = snewn(b, struct stats);
nruns = 0;
while (nruns < 10000) {
nruns++;
find_and_check_matching();
assert(count == expected_size);
for (i = 0; i < n; i++)
for (j = 0; j < b; j++)
if ((i ^ outl[i]) == (1 << j))
edgecounts[b*i+j]++;
if (nruns % 1000 == 0) {
for (i = 0; i < b; i++) {
struct stats *st = &stats[i];
st->min = st->max = -1;
st->n = st->sx = st->sxx = 0;
}
for (i = 0; i < n; i++) {
int pop = 0;
for (j = 0; j < b; j++)
if (i & (1 << j))
pop++;
pop--;
for (j = 0; j < b; j++) {
if (i & (1 << j)) {
struct stats *st = &stats[pop];
int x = edgecounts[b*i+j];
if (st->max == -1 || st->max < x)
st->max = x;
if (st->min == -1 || st->min > x)
st->min = x;
st->n++;
st->sx += x;
st->sxx += (double)x*x;
} else {
assert(edgecounts[b*i+j] == 0);
}
}
}
}
}
printf("after %d runs:\n", nruns);
for (j = 0; j < b; j++) {
struct stats *st = &stats[j];
printf("edges between layers %d,%d:"
" min=%d max=%d mean=%f variance=%f\n",
j, j+1, st->min, st->max, st->sx/st->n,
(st->sxx - st->sx*st->sx/st->n) / st->n);
}
}
int main(int argc, char **argv)
{
static const char stdin_identifier[] = "<standard input>";
const char *infile = NULL;
bool doing_opts = true;
enum { USER_INPUT, AUTOTEST } mode = USER_INPUT;
while (--argc > 0) {
const char *arg = *++argv;
if (doing_opts && arg[0] == '-' && arg[1]) {
if (!strcmp(arg, "--")) {
doing_opts = false;
} else if (!strcmp(arg, "--random")) {
char buf[64];
int len = sprintf(buf, "%lu", (unsigned long)time(NULL));
rs = random_new(buf, len);
} else if (!strcmp(arg, "--autotest")) {
mode = AUTOTEST;
} else {
fprintf(stderr, "matching: unrecognised option '%s'\n", arg);
return 1;
}
} else {
if (!infile) {
infile = (!strcmp(arg, "-") ? stdin_identifier : arg);
} else {
fprintf(stderr, "matching: too many arguments\n");
return 1;
}
}
}
if (mode == USER_INPUT) {
FILE *fp;
if (!infile)
infile = stdin_identifier;
if (infile != stdin_identifier) {
fp = fopen(infile, "r");
if (!fp) {
fprintf(stderr, "matching: could not open input file '%s'\n",
infile);
return 1;
}
} else {
fp = stdin;
}
matching_from_user_input(fp, infile);
if (infile != stdin_identifier)
fclose(fp);
}
if (mode == AUTOTEST) {
if (infile) {
fprintf(stderr, "matching: expected no filename argument "
"with --autotest\n");
return 1;
}
test_subsets();
}
return 0;
}
#endif /* STANDALONE_MATCHING_TEST */