Consensus diff implementation, including both the generation and the application of diffs in a minimal ed format. More...

#include "or.h"
#include "consdiff.h"
#include "memarea.h"
#include "routerparse.h"

Include dependency graph for consdiff.c:

Data Structures
struct	router_id_iterator_t

Macros
#define	CONSDIFF_PRIVATE

#define	NOT_VALID_BASE64 255

#define	X NOT_VALID_BASE64

#define	SP NOT_VALID_BASE64

#define	PAD NOT_VALID_BASE64

#define	ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }

#define	START_OF_SIGNATURES_SECTION "directory-signature "

#define	MAX_LINE_COUNT (10000)

#define	CONSENSUS_LINE_MAX_LEN (1<<20)

Typedefs
typedef struct router_id_iterator_t	router_id_iterator_t

Functions
STATIC int	lines_eq (const cdline_t a, const cdline_t b)

STATIC int	line_str_eq (const cdline_t a, const char b)

STATIC void	smartlist_add_linecpy (smartlist_t lst, memarea_t area, const char *s)

	MOCK_IMPL (STATIC int, consensus_compute_digest,(const char cons, consensus_digest_t digest_out))

	MOCK_IMPL (STATIC int, consensus_compute_digest_as_signed,(const char cons, consensus_digest_t digest_out))

	MOCK_IMPL (STATIC int, consensus_digest_eq,(const uint8_t d1, const uint8_t d2))

STATIC smartlist_slice_t *	smartlist_slice (const smartlist_t *list, int start, int end)

STATIC int *	lcs_lengths (const smartlist_slice_t slice1, const smartlist_slice_t slice2, int direction)

STATIC void	trim_slices (smartlist_slice_t slice1, smartlist_slice_t slice2)

STATIC int	smartlist_slice_string_pos (const smartlist_slice_t slice, const cdline_t string)

STATIC void	set_changed (bitarray_t changed1, bitarray_t changed2, const smartlist_slice_t slice1, const smartlist_slice_t slice2)

STATIC void	calc_changes (smartlist_slice_t slice1, smartlist_slice_t slice2, bitarray_t changed1, bitarray_t changed2)

STATIC int	get_id_hash (const cdline_t line, cdline_t hash_out)

STATIC int	is_valid_router_entry (const cdline_t *line)

STATIC int	next_router (const smartlist_t *cons, int cur)

STATIC int	base64cmp (const cdline_t hash1, const cdline_t hash2)

STATIC smartlist_t *	gen_ed_diff (const smartlist_t cons1_orig, const smartlist_t cons2, memarea_t *area)

STATIC smartlist_t *	apply_ed_diff (const smartlist_t cons1, const smartlist_t diff, int diff_starting_line)

smartlist_t *	consdiff_gen_diff (const smartlist_t cons1, const smartlist_t cons2, const consensus_digest_t digests1, const consensus_digest_t digests2, memarea_t *area)

int	consdiff_get_digests (const smartlist_t diff, char digest1_out, char *digest2_out)

char *	consdiff_apply_diff (const smartlist_t cons1, const smartlist_t diff, const consensus_digest_t *digests1)

STATIC int	consensus_split_lines (smartlist_t out, const char s, memarea_t *area)

char *	consensus_diff_generate (const char cons1, const char cons2)

char *	consensus_diff_apply (const char consensus, const char diff)

int	looks_like_a_consensus_diff (const char *document, size_t len)

Detailed Description

Consensus diff implementation, including both the generation and the application of diffs in a minimal ed format.

The consensus diff application is done in consdiff_apply_diff, which relies on apply_ed_diff for the main ed diff part and on some digest helper functions to check the digest hashes found in the consensus diff header.

The consensus diff generation is more complex. consdiff_gen_diff generates it, relying on gen_ed_diff to generate the ed diff and some digest helper functions to generate the digest hashes.

gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic time and linear space to generate an ed diff given two smartlists. As shown in its comment section, calling calc_changes on the entire two consensuses will calculate what is to be added and what is to be deleted in the diff. Its comment section briefly explains how it works.

In our case specific to consensuses, we take advantage of the fact that consensuses list routers sorted by their identities. We use that information to avoid running calc_changes on the whole smartlists. gen_ed_diff will navigate through the two consensuses identity by identity and will send small couples of slices to calc_changes, keeping the running time near-linear. This is explained in more detail in the gen_ed_diff comments.

The allocation strategy tries to save time and memory by avoiding needless copies. Instead of actually splitting the inputs into separate strings, we allocate cdline_t objects, each of which represents a line in the original object or in the output. We use memarea_t allocators to manage the temporary memory we use when generating or applying diffs.

Macro Definition Documentation

◆ CONSENSUS_LINE_MAX_LEN

#define CONSENSUS_LINE_MAX_LEN (1<<20)

Any consensus line longer than this means that the input is invalid.

◆ ROUTER_ID_ITERATOR_INIT

#define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }

Initializer for a router_id_iterator_t.

◆ START_OF_SIGNATURES_SECTION

#define START_OF_SIGNATURES_SECTION "directory-signature "

Line-prefix indicating the beginning of the signatures section that we intend to delete.

Typedef Documentation

◆ router_id_iterator_t

typedef struct router_id_iterator_t router_id_iterator_t

Structure used to remember the previous and current identity hash of the "r " lines in a consensus, to enforce well-ordering.

Function Documentation

◆ apply_ed_diff()

STATIC smartlist_t* apply_ed_diff	(	const smartlist_t *	cons1,
		const smartlist_t *	diff,
		int	diff_starting_line
	)

Apply the ed diff, starting at diff_starting_line, to the consensus and return a new consensus, also as a line-based smartlist. Will return NULL if the ed diff is not properly formatted.

All cdline_t objects in the resulting object are references to lines in one of the inputs; nothing is copied.

$ is not allowed with non-d actions.

Here is the caller graph for this function:

◆ base64cmp()

STATIC int base64cmp	(	const cdline_t *	hash1,
		const cdline_t *	hash2
	)

Helper: compare two base64-encoded identity hashes, which may be of different lengths. Comparison ends when the first non-base64 char is found.

◆ calc_changes()

STATIC void calc_changes	(	smartlist_slice_t *	slice1,
		smartlist_slice_t *	slice2,
		bitarray_t *	changed1,
		bitarray_t *	changed2
	)

Helper: Figure out what elements are new or gone on the second smartlist relative to the first smartlist, and store the booleans in the bitarrays. True on the first bitarray means the element is gone, true on the second bitarray means it's new.

In its base case, either of the smartlists is of length <= 1 and we can quickly see what elements are new or are gone. In the other case, we will split one smartlist by half and we'll use optimal_column_to_split to find the optimal column at which to split the second smartlist so that we are finding the smallest diff possible.

Here is the call graph for this function:

◆ consdiff_apply_diff()

char* consdiff_apply_diff	(	const smartlist_t *	cons1,
		const smartlist_t *	diff,
		const consensus_digest_t *	digests1
	)

Apply the consensus diff to the given consensus and return a new consensus, also as a line-based smartlist. Will return NULL if the diff could not be applied. Neither the consensus nor the diff are modified in any way, so it's up to the caller to free their resources.

Here is the call graph for this function:

◆ consdiff_gen_diff()

smartlist_t* consdiff_gen_diff	(	const smartlist_t *	cons1,
		const smartlist_t *	cons2,
		const consensus_digest_t *	digests1,
		const consensus_digest_t *	digests2,
		memarea_t *	area
	)

Generate a consensus diff as a smartlist from two given consensuses, also as smartlists. Will return NULL if the consensus diff could not be generated. Neither of the two consensuses are modified in any way, so it's up to the caller to free their resources.

Here is the call graph for this function:

◆ consdiff_get_digests()

int consdiff_get_digests	(	const smartlist_t *	diff,
		char *	digest1_out,
		char *	digest2_out
	)

Fetch the digest of the base consensus in the consensus diff, encoded in base16 as found in the diff itself. digest1_out and digest2_out must be of length DIGEST256_LEN or larger if not NULL.

Here is the caller graph for this function:

◆ consensus_diff_apply()

char* consensus_diff_apply	(	const char *	consensus,
		const char *	diff
	)

Given a consensus document and a diff, try to apply the diff to the consensus. On success return a newly allocated string containing the new consensus. On failure, return NULL.

Here is the call graph for this function:

◆ consensus_diff_generate()

char* consensus_diff_generate	(	const char *	cons1,
		const char *	cons2
	)

Given two consensus documents, try to compute a diff between them. On success, retun a newly allocated string containing that diff. On failure, return NULL.

◆ consensus_split_lines()

STATIC int consensus_split_lines	(	smartlist_t *	out,
		const char *	s,
		memarea_t *	area
	)

Helper: For every NL-terminated line in s, add a cdline referring to that line (without trailing newline) to out. Return -1 if there are any non-NL terminated lines; 0 otherwise.

Unlike tor_split_lines, this function avoids ambiguity on its handling of a final line that isn't NL-terminated.

All cdline_t objects are allocated in the provided memarea. Strings are not copied: if s changes or becomes invalid, then all generated cdlines will become invalid.

Here is the call graph for this function:

◆ gen_ed_diff()

STATIC smartlist_t* gen_ed_diff	(	const smartlist_t *	cons1_orig,
		const smartlist_t *	cons2,
		memarea_t *	area
	)

Generate an ed diff as a smartlist from two consensuses, also given as smartlists. Will return NULL if the diff could not be generated, which can happen if any lines the script had to add matched "." or if the routers were not properly ordered.

All cdline_t objects in the resulting object are either references to lines in one of the inputs, or are newly allocated lines in the provided memarea.

This implementation is consensus-specific. To generate an ed diff for any given input in quadratic time, you can replace all the code until the navigation in reverse order with the following:

int len1 = smartlist_len(cons1); int len2 = smartlist_len(cons2); bitarray_t *changed1 = bitarray_init_zero(len1); bitarray_t *changed2 = bitarray_init_zero(len2); cons1_sl = smartlist_slice(cons1, 0, -1); cons2_sl = smartlist_slice(cons2, 0, -1); calc_changes(cons1_sl, cons2_sl, changed1, changed2);

Here is the call graph for this function:

Here is the caller graph for this function:

◆ get_id_hash()

STATIC int get_id_hash	(	const cdline_t *	line,
		cdline_t *	hash_out
	)

Helper: Get the identity hash from a router line, assuming that the line at least appears to be a router line and thus starts with "r ".

If an identity hash is found, store it (without decoding it) in hash_out, and return 0. On failure, return -1.

◆ is_valid_router_entry()

STATIC int is_valid_router_entry ( const cdline_t * line )

Helper: Check that a line is a valid router entry. We must at least be able to fetch a proper identity hash from it for it to be valid.

Here is the caller graph for this function:

◆ lcs_lengths()

STATIC int* lcs_lengths	(	const smartlist_slice_t *	slice1,
		const smartlist_slice_t *	slice2,
		int	direction
	)

Helper: Compute the longest common subsequence lengths for the two slices. Used as part of the diff generation to find the column at which to split slice2 while still having the optimal solution. If direction is -1, the navigation is reversed. Otherwise it must be 1. The length of the resulting integer array is that of the second slice plus one.

◆ line_str_eq()

STATIC int line_str_eq	(	const cdline_t *	a,
		const char *	b
	)

Return true iff a has the same contents as the nul-terminated string b.

Here is the call graph for this function:

◆ lines_eq()

STATIC int lines_eq	(	const cdline_t *	a,
		const cdline_t *	b
	)

Return true iff a and b have the same contents.

Here is the caller graph for this function:

◆ looks_like_a_consensus_diff()

int looks_like_a_consensus_diff	(	const char *	document,
		size_t	len
	)

Return true iff, based on its header, document is likely to be a consensus diff.

Here is the caller graph for this function:

◆ MOCK_IMPL() [1/3]

MOCK_IMPL	(	STATIC	int,
		consensus_compute_digest	,
		(const char cons, consensus_digest_t digest_out)
	)

Compute the digest of cons, and store the result in digest_out. Return 0 on success, -1 on failure.

Here is the call graph for this function:

◆ MOCK_IMPL() [2/3]

MOCK_IMPL	(	STATIC	int,
		consensus_compute_digest_as_signed	,
		(const char cons, consensus_digest_t digest_out)
	)

Compute the digest-as-signed of cons, and store the result in digest_out. Return 0 on success, -1 on failure.

Here is the call graph for this function:

◆ MOCK_IMPL() [3/3]

MOCK_IMPL	(	STATIC	int,
		consensus_digest_eq	,
		(const uint8_t d1, const uint8_t d2)
	)

Return true iff d1 and d2 contain the same digest

◆ next_router()

STATIC int next_router	(	const smartlist_t *	cons,
		int	cur
	)

Helper: Find the next router line starting at the current position. Assumes that cur is lower than the length of the smartlist, i.e. it is a line within the bounds of the consensus. The only exception is when we don't want to skip the first line, in which case cur will be -1.

Here is the call graph for this function:

◆ set_changed()

STATIC void set_changed	(	bitarray_t *	changed1,
		bitarray_t *	changed2,
		const smartlist_slice_t *	slice1,
		const smartlist_slice_t *	slice2
	)

Helper: Set all the appropriate changed booleans to true. The first slice must be of length 0 or 1. All the lines of slice1 and slice2 which are not present in the other slice will be set to changed in their bool array. The two changed bool arrays are passed in the same order as the slices.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ smartlist_add_linecpy()

STATIC void smartlist_add_linecpy	(	smartlist_t *	lst,
		memarea_t *	area,
		const char *	s
	)

Add a cdline_t to lst holding as its contents the nul-terminated string s. Use the provided memory area for storage.

Here is the call graph for this function:

◆ smartlist_slice()

STATIC smartlist_slice_t* smartlist_slice	(	const smartlist_t *	list,
		int	start,
		int	end
	)

Create (allocate) a new slice from a smartlist. Assumes that the start and the end indexes are within the bounds of the initial smartlist. The end element is not part of the resulting slice. If end is -1, the slice is to reach the end of the smartlist.

Here is the caller graph for this function:

◆ smartlist_slice_string_pos()

STATIC int smartlist_slice_string_pos	(	const smartlist_slice_t *	slice,
		const cdline_t *	string
	)

Like smartlist_string_pos, but uses a cdline_t, and is restricted to the bounds of the slice.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ trim_slices()

STATIC void trim_slices	(	smartlist_slice_t *	slice1,
		smartlist_slice_t *	slice2
	)

Helper: Trim any number of lines that are equally at the start or the end of both slices.

Here is the call graph for this function:

Here is the caller graph for this function:

Data Structures

Macros

Typedefs

Functions

Detailed Description

Macro Definition Documentation

◆ CONSENSUS_LINE_MAX_LEN

◆ ROUTER_ID_ITERATOR_INIT

◆ START_OF_SIGNATURES_SECTION

Typedef Documentation

◆ router_id_iterator_t

Function Documentation

◆ apply_ed_diff()

◆ base64cmp()

◆ calc_changes()

◆ consdiff_apply_diff()

◆ consdiff_gen_diff()

◆ consdiff_get_digests()

◆ consensus_diff_apply()

◆ consensus_diff_generate()

◆ consensus_split_lines()

◆ gen_ed_diff()

◆ get_id_hash()

◆ is_valid_router_entry()

◆ lcs_lengths()

◆ line_str_eq()

◆ lines_eq()

◆ looks_like_a_consensus_diff()

◆ MOCK_IMPL() [1/3]

◆ MOCK_IMPL() [2/3]

◆ MOCK_IMPL() [3/3]

◆ next_router()

◆ set_changed()

◆ smartlist_add_linecpy()

◆ smartlist_slice()

◆ smartlist_slice_string_pos()

◆ trim_slices()