tor  master
Data Structures | Macros | Typedefs | Functions
geoip.c File Reference

Functions related to maintaining an IP-to-country database; to summarizing client connections by country to entry guards, bridges, and directory servers; and for statistics on answering network status requests. More...

#include "or.h"
#include "ht.h"
#include "buffers.h"
#include "config.h"
#include "control.h"
#include "dnsserv.h"
#include "dos.h"
#include "geoip.h"
#include "routerlist.h"
Include dependency graph for geoip.c:

Data Structures

struct  geoip_ipv4_entry_t
 
struct  geoip_ipv6_entry_t
 
struct  geoip_country_t
 
struct  c_hist_t
 
struct  dirreq_map_entry_t
 

Macros

#define GEOIP_PRIVATE
 
#define MAX_LAST_SEEN_IN_MINUTES   0X3FFFFFFFu
 
#define clientmap_entry_free(ent)   FREE_AND_NULL(clientmap_entry_t, clientmap_entry_free_, ent)
 
#define GEOIP_CLIENT_CACHE_OOM_MIN_CUTOFF   (4 * 60 * 60)
 
#define GEOIP_CLIENT_CACHE_OOM_STEP   (15 * 50)
 
#define MIN_IPS_TO_NOTE_COUNTRY   1
 
#define MIN_IPS_TO_NOTE_ANYTHING   1
 
#define IP_GRANULARITY   8
 
#define DIRREQ_TIMEOUT   (10*60)
 
#define DIR_REQ_GRANULARITY   4
 
#define MIN_DIR_REQ_RESPONSES   16
 
#define RESPONSE_GRANULARITY   8
 

Typedefs

typedef struct geoip_ipv4_entry_t geoip_ipv4_entry_t
 
typedef struct geoip_ipv6_entry_t geoip_ipv6_entry_t
 
typedef struct geoip_country_t geoip_country_t
 
typedef struct c_hist_t c_hist_t
 
typedef struct dirreq_map_entry_t dirreq_map_entry_t
 

Functions

 MOCK_IMPL (country_t, geoip_get_country,(const char *country))
 
STATIC int geoip_parse_entry (const char *line, sa_family_t family)
 
int should_record_bridge_info (const or_options_t *options)
 
int geoip_load_file (sa_family_t family, const char *filename)
 
STATIC int geoip_get_country_by_ipv4 (uint32_t ipaddr)
 
STATIC int geoip_get_country_by_ipv6 (const struct in6_addr *addr)
 
 MOCK_IMPL (int, geoip_get_country_by_addr,(const tor_addr_t *addr))
 
 MOCK_IMPL (int, geoip_get_n_countries,(void))
 
const char * geoip_get_country_name (country_t num)
 
 MOCK_IMPL (int, geoip_is_loaded,(sa_family_t family))
 
const char * geoip_db_digest (sa_family_t family)
 
 HT_PROTOTYPE (HT_GENERATE2(clientmap, HT_GENERATE2(clientmap_entry_t, HT_GENERATE2(node, HT_GENERATE2(clientmap_entry_hash, HT_GENERATE2(clientmap_entries_eq)
 
void geoip_note_client_seen (geoip_client_action_t action, const tor_addr_t *addr, const char *transport_name, time_t now)
 
void geoip_remove_old_clients (time_t cutoff)
 
clientmap_entry_tgeoip_lookup_client (const tor_addr_t *addr, const char *transport_name, geoip_client_action_t action)
 
size_t geoip_client_cache_handle_oom (time_t now, size_t min_remove_bytes)
 
size_t geoip_client_cache_total_allocation (void)
 
void geoip_note_ns_response (geoip_ns_response_t response)
 
void geoip_start_dirreq (uint64_t dirreq_id, size_t response_size, dirreq_type_t type)
 
void geoip_change_dirreq_state (uint64_t dirreq_id, dirreq_type_t type, dirreq_state_t new_state)
 
char * geoip_get_transport_history (void)
 
int geoip_get_client_history (geoip_client_action_t action, char **country_str, char **ipver_str)
 
char * geoip_get_request_history (void)
 
void geoip_dirreq_stats_init (time_t now)
 
void geoip_reset_dirreq_stats (time_t now)
 
void geoip_dirreq_stats_term (void)
 
char * geoip_format_dirreq_stats (time_t now)
 
time_t geoip_dirreq_stats_write (time_t now)
 
void geoip_bridge_stats_init (time_t now)
 
void geoip_bridge_stats_term (void)
 
char * geoip_format_bridge_stats (time_t now)
 
char * format_client_stats_heartbeat (time_t now)
 
time_t geoip_bridge_stats_write (time_t now)
 
const char * geoip_get_bridge_stats_extrainfo (time_t now)
 
char * geoip_get_bridge_stats_controller (time_t now)
 
void geoip_entry_stats_init (time_t now)
 
void geoip_reset_entry_stats (time_t now)
 
void geoip_entry_stats_term (void)
 
char * geoip_format_entry_stats (time_t now)
 
time_t geoip_entry_stats_write (time_t now)
 
int getinfo_helper_geoip (control_connection_t *control_conn, const char *question, char **answer, const char **errmsg)
 
STATIC void clear_geoip_db (void)
 
void geoip_free_all (void)
 

Detailed Description

Functions related to maintaining an IP-to-country database; to summarizing client connections by country to entry guards, bridges, and directory servers; and for statistics on answering network status requests.

There are two main kinds of functions in this module: geoip functions, which map groups of IPv4 and IPv6 addresses to country codes, and statistical functions, which collect statistics about different kinds of per-country usage.

The geoip lookup tables are implemented as sorted lists of disjoint address ranges, each mapping to a singleton geoip_country_t. These country objects are also indexed by their names in a hashtable.

The tables are populated from disk at startup by the geoip_load_file() function. For more information on the file format they read, see that function. See the scripts and the README file in src/config for more information about how those files are generated.

Tor uses GeoIP information in order to implement user requests (such as ExcludeNodes {cc}), and to keep track of how much usage relays are getting for each country.

Macro Definition Documentation

◆ DIRREQ_TIMEOUT

#define DIRREQ_TIMEOUT   (10*60)

When there are incomplete directory requests at the end of a 24-hour period, consider those requests running for longer than this timeout as failed, the others as still running.

◆ IP_GRANULARITY

#define IP_GRANULARITY   8

When reporting geoip data about countries, round up to the nearest multiple of this value.

◆ MAX_LAST_SEEN_IN_MINUTES

#define MAX_LAST_SEEN_IN_MINUTES   0X3FFFFFFFu

Largest allowable value for last_seen_in_minutes. (It's a 30-bit field, so it can hold up to (1u<<30)-1, or 0x3fffffffu.

◆ MIN_IPS_TO_NOTE_ANYTHING

#define MIN_IPS_TO_NOTE_ANYTHING   1

Do not report any geoip data at all if we have fewer than this number of IPs to report about.

◆ MIN_IPS_TO_NOTE_COUNTRY

#define MIN_IPS_TO_NOTE_COUNTRY   1

Do not mention any country from which fewer than this number of IPs have connected. This conceivably avoids reporting information that could deanonymize users, though analysis is lacking.

Typedef Documentation

◆ c_hist_t

typedef struct c_hist_t c_hist_t

Helper type: used to sort per-country totals by value.

◆ dirreq_map_entry_t

Entry in a map from either chan->global_identifier for direct requests or a unique circuit identifier for tunneled requests to request time, response size, and completion time of a network status request. Used to measure download times of requests to derive average client bandwidths.

◆ geoip_country_t

A per-country record for GeoIP request history.

◆ geoip_ipv4_entry_t

An entry from the GeoIP IPv4 file: maps an IPv4 range to a country.

◆ geoip_ipv6_entry_t

An entry from the GeoIP IPv6 file: maps an IPv6 range to a country.

Function Documentation

◆ clear_geoip_db()

STATIC void clear_geoip_db ( void  )

Release all storage held by the GeoIP databases and country list.

◆ format_client_stats_heartbeat()

char* format_client_stats_heartbeat ( time_t  now)

Return a newly allocated string holding our bridge usage stats by country in a format suitable for inclusion in our heartbeat message. Return NULL on failure.

Here is the call graph for this function:

◆ geoip_bridge_stats_init()

void geoip_bridge_stats_init ( time_t  now)

Initialize bridge stats.

◆ geoip_bridge_stats_term()

void geoip_bridge_stats_term ( void  )

Stop collecting bridge stats in a way that we can re-start doing so in geoip_bridge_stats_init().

◆ geoip_bridge_stats_write()

time_t geoip_bridge_stats_write ( time_t  now)

Write bridge statistics to $DATADIR/stats/bridge-stats and return when we should next try to write statistics.

◆ geoip_change_dirreq_state()

void geoip_change_dirreq_state ( uint64_t  dirreq_id,
dirreq_type_t  type,
dirreq_state_t  new_state 
)

Change the state of the either direct or tunneled (see type) directory request with dirreq_id to new_state and possibly mark it as completed. If no entry can be found for the given key parts (e.g., if this is a directory request that we are not measuring, or one that was started in the previous measurement period), or if the state cannot be advanced to new_state, do nothing.

Here is the caller graph for this function:

◆ geoip_db_digest()

const char* geoip_db_digest ( sa_family_t  family)

Return the hex-encoded SHA1 digest of the loaded GeoIP file. The result does not need to be deallocated, but will be overwritten by the next call of hex_str().

Here is the call graph for this function:

◆ geoip_dirreq_stats_init()

void geoip_dirreq_stats_init ( time_t  now)

Initialize directory request stats.

◆ geoip_dirreq_stats_term()

void geoip_dirreq_stats_term ( void  )

Stop collecting directory request stats in a way that we can re-start doing so in geoip_dirreq_stats_init().

Here is the call graph for this function:

◆ geoip_dirreq_stats_write()

time_t geoip_dirreq_stats_write ( time_t  now)

If 24 hours have passed since the beginning of the current dirreq stats period, write dirreq stats to $DATADIR/stats/dirreq-stats (possibly overwriting an existing file) and reset counters. Return when we would next want to write dirreq stats or 0 if we never want to write.

◆ geoip_entry_stats_init()

void geoip_entry_stats_init ( time_t  now)

Initialize entry stats.

◆ geoip_entry_stats_term()

void geoip_entry_stats_term ( void  )

Stop collecting entry stats in a way that we can re-start doing so in geoip_entry_stats_init().

Here is the call graph for this function:

◆ geoip_entry_stats_write()

time_t geoip_entry_stats_write ( time_t  now)

If 24 hours have passed since the beginning of the current entry stats period, write entry stats to $DATADIR/stats/entry-stats (possibly overwriting an existing file) and reset counters. Return when we would next want to write entry stats or 0 if we never want to write.

◆ geoip_format_bridge_stats()

char* geoip_format_bridge_stats ( time_t  now)

Return a newly allocated string holding our bridge usage stats by country in a format suitable for inclusion in an extrainfo document. Return NULL on failure.

◆ geoip_format_dirreq_stats()

char* geoip_format_dirreq_stats ( time_t  now)

Return a newly allocated string containing the dirreq statistics until now, or NULL if we're not collecting dirreq stats. Caller must ensure start_of_dirreq_stats_interval is in the past.

◆ geoip_format_entry_stats()

char* geoip_format_entry_stats ( time_t  now)

Return a newly allocated string containing the entry statistics until now, or NULL if we're not collecting entry stats. Caller must ensure start_of_entry_stats_interval lies in the past.

◆ geoip_free_all()

void geoip_free_all ( void  )

Release all storage held in this file.

◆ geoip_get_bridge_stats_controller()

char* geoip_get_bridge_stats_controller ( time_t  now)

Return a new string containing the recent bridge statistics to be returned to controller clients, or NULL if we don't have any bridge statistics.

◆ geoip_get_bridge_stats_extrainfo()

const char* geoip_get_bridge_stats_extrainfo ( time_t  now)

Return most recent bridge statistics for inclusion in extra-info descriptors, or NULL if we don't have recent bridge statistics.

◆ geoip_get_client_history()

int geoip_get_client_history ( geoip_client_action_t  action,
char **  country_str,
char **  ipver_str 
)

Store a newly allocated comma-separated string in *country_str containing entries for all the countries from which we've seen enough clients connect as a bridge, directory server, or entry guard. The entry format is cc=num where num is the number of IPs we've seen connecting from that country, and cc is a lowercased country code. *country_str is set to NULL if we're not ready to export per country data yet.

Store a newly allocated comma-separated string in ipver_str containing entries for clients connecting over IPv4 and IPv6. The format is family=num where num is the nubmer of IPs we've seen connecting over that protocol family, and family is 'v4' or 'v6'.

Return 0 on success and -1 if we're missing geoip data.

unresolved requests are stored at index 0.

◆ geoip_get_country_by_ipv4()

STATIC int geoip_get_country_by_ipv4 ( uint32_t  ipaddr)

Given an IP address in host order, return a number representing the country to which that address belongs, -1 for "No geoip information available", or 0 for the 'unknown country'. The return value will always be less than geoip_get_n_countries(). To decode it, call geoip_get_country_name().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ geoip_get_country_by_ipv6()

STATIC int geoip_get_country_by_ipv6 ( const struct in6_addr addr)

Given an IPv6 address, return a number representing the country to which that address belongs, -1 for "No geoip information available", or 0 for the 'unknown country'. The return value will always be less than geoip_get_n_countries(). To decode it, call geoip_get_country_name().

◆ geoip_get_country_name()

const char* geoip_get_country_name ( country_t  num)

Return the two-letter country code associated with the number num, or "??" for an unknown value.

◆ geoip_get_request_history()

char* geoip_get_request_history ( void  )

Return a newly allocated string holding the per-country request history for v3 network statuses in a format suitable for an extra-info document, or NULL on failure.

◆ geoip_get_transport_history()

char* geoip_get_transport_history ( void  )

Return the bridge-ip-transports string that should be inserted in our extra-info descriptor. Return NULL if the bridge-ip-transports line should be empty.

String hash table (name of transport) -> (number of users).

Smartlist that contains copies of the names of the transports that have been used.

We do the following steps to form the transport history string: a) Foreach client that uses a pluggable transport, we increase the times that transport was used by one. If the client did not use a transport, we increase the number of times someone connected without obfuscation. b) Foreach transport we observed, we write its transport history string and push it to string_chunks. So, for example, if we've seen 665 obfs2 clients, we write "obfs2=665". c) We concatenate string_chunks to form the final string.

◆ geoip_load_file()

int geoip_load_file ( sa_family_t  family,
const char *  filename 
)

Clear appropriate GeoIP database, based on family, and reload it from the file filename. Return 0 on success, -1 on failure.

Recognized line formats for IPv4 are: INTIPLOW,INTIPHIGH,CC and "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME" where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned integers, and CC is a country code.

Recognized line format for IPv6 is: IPV6LOW,IPV6HIGH,CC where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.

It also recognizes, and skips over, blank lines and lines that start with '#' (comments).

Here is the call graph for this function:

◆ geoip_note_client_seen()

void geoip_note_client_seen ( geoip_client_action_t  action,
const tor_addr_t addr,
const char *  transport_name,
time_t  now 
)

Note that we've seen a client connect from the IP addr at time now. Ignored by all but bridges and directories if configured accordingly.

unresolved requests are stored at index 0.

Here is the call graph for this function:

◆ geoip_note_ns_response()

void geoip_note_ns_response ( geoip_ns_response_t  response)

Note that we've rejected a client's request for a v3 network status for reason reason at time now.

◆ geoip_parse_entry()

STATIC int geoip_parse_entry ( const char *  line,
sa_family_t  family 
)

Add an entry to the GeoIP table indicated by family, parsing it from line. The format is as for geoip_load_file().

◆ geoip_remove_old_clients()

void geoip_remove_old_clients ( time_t  cutoff)

Forget about all clients that haven't connected since cutoff.

◆ geoip_reset_dirreq_stats()

void geoip_reset_dirreq_stats ( time_t  now)

Reset counters for dirreq stats.

Here is the caller graph for this function:

◆ geoip_reset_entry_stats()

void geoip_reset_entry_stats ( time_t  now)

Reset counters for entry stats.

Here is the caller graph for this function:

◆ geoip_start_dirreq()

void geoip_start_dirreq ( uint64_t  dirreq_id,
size_t  response_size,
dirreq_type_t  type 
)

Note that an either direct or tunneled (see type) directory request for a v3 network status with unique ID dirreq_id of size response_size has started.

◆ getinfo_helper_geoip()

int getinfo_helper_geoip ( control_connection_t control_conn,
const char *  question,
char **  answer,
const char **  errmsg 
)

Helper used to implement GETINFO ip-to-country/... controller command.

Here is the call graph for this function:

◆ HT_PROTOTYPE()

HT_PROTOTYPE ( HT_GENERATE2(  clientmap,
HT_GENERATE2(  clientmap_entry_t,
HT_GENERATE2(  node,
HT_GENERATE2(  clientmap_entry_hash,
HT_GENERATE2(  clientmap_entries_eq 
)

Return the size of a client map entry.

◆ MOCK_IMPL() [1/4]

MOCK_IMPL ( country_t  ,
geoip_get_country  ,
(const char *country)   
)

Return the index of the country's entry in the GeoIP country list if it is a valid 2-letter country code, otherwise return -1.

Here is the call graph for this function:

◆ MOCK_IMPL() [2/4]

MOCK_IMPL ( int  ,
geoip_get_country_by_addr  ,
(const tor_addr_t *addr)   
)

Given an IP address, return a number representing the country to which that address belongs, -1 for "No geoip information available", or 0 for the 'unknown country'. The return value will always be less than geoip_get_n_countries(). To decode it, call geoip_get_country_name().

◆ MOCK_IMPL() [3/4]

MOCK_IMPL ( int  ,
geoip_get_n_countries  ,
(void)   
)

Return the number of countries recognized by the GeoIP country list.

◆ MOCK_IMPL() [4/4]

MOCK_IMPL ( int  ,
geoip_is_loaded  ,
(sa_family_t family)   
)

Return true iff we have loaded a GeoIP database.

◆ should_record_bridge_info()

int should_record_bridge_info ( const or_options_t options)

Return 1 if we should collect geoip stats on bridge users, and include them in our extrainfo descriptor. Else return 0.

Here is the caller graph for this function: