reth_cli_commands/db/
diff.rs

1use clap::Parser;
2use reth_db::{open_db_read_only, tables_to_generic, DatabaseEnv, Tables};
3use reth_db_api::{cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx};
4use reth_db_common::DbTool;
5use reth_node_builder::{NodeTypesWithDBAdapter, NodeTypesWithEngine};
6use reth_node_core::{
7    args::DatabaseArgs,
8    dirs::{DataDirPath, PlatformPath},
9};
10use std::{
11    collections::HashMap,
12    fmt::Debug,
13    fs::{self, File},
14    hash::Hash,
15    io::Write,
16    path::{Path, PathBuf},
17    sync::Arc,
18};
19use tracing::{info, warn};
20
21#[derive(Parser, Debug)]
22/// The arguments for the `reth db diff` command
23pub struct Command {
24    /// The path to the data dir for all reth files and subdirectories.
25    #[arg(long, verbatim_doc_comment)]
26    secondary_datadir: PlatformPath<DataDirPath>,
27
28    /// Arguments for the second database
29    #[command(flatten)]
30    second_db: DatabaseArgs,
31
32    /// The table name to diff. If not specified, all tables are diffed.
33    #[arg(long, verbatim_doc_comment)]
34    table: Option<Tables>,
35
36    /// The output directory for the diff report.
37    #[arg(long, verbatim_doc_comment)]
38    output: PlatformPath<PathBuf>,
39}
40
41impl Command {
42    /// Execute the `db diff` command.
43    ///
44    /// This first opens the `db/` folder from the secondary datadir, where the second database is
45    /// opened read-only.
46    ///
47    /// The tool will then iterate through all key-value pairs for the primary and secondary
48    /// databases. The value for each key will be compared with its corresponding value in the
49    /// other database. If the values are different, a discrepancy will be recorded in-memory. If
50    /// one key is present in one database but not the other, this will be recorded as an "extra
51    /// element" for that database.
52    ///
53    /// The discrepancies and extra elements, along with a brief summary of the diff results are
54    /// then written to a file in the output directory.
55    pub fn execute<T: NodeTypesWithEngine>(
56        self,
57        tool: &DbTool<NodeTypesWithDBAdapter<T, Arc<DatabaseEnv>>>,
58    ) -> eyre::Result<()> {
59        warn!("Make sure the node is not running when running `reth db diff`!");
60        // open second db
61        let second_db_path: PathBuf = self.secondary_datadir.join("db").into();
62        let second_db = open_db_read_only(&second_db_path, self.second_db.database_args())?;
63
64        let tables = match &self.table {
65            Some(table) => std::slice::from_ref(table),
66            None => Tables::ALL,
67        };
68
69        for table in tables {
70            let mut primary_tx = tool.provider_factory.db_ref().tx()?;
71            let mut secondary_tx = second_db.tx()?;
72
73            // disable long read transaction safety, since this will run for a while and it's
74            // expected that the node is not running
75            primary_tx.disable_long_read_transaction_safety();
76            secondary_tx.disable_long_read_transaction_safety();
77
78            let output_dir = self.output.clone();
79            tables_to_generic!(table, |Table| find_diffs::<Table>(
80                primary_tx,
81                secondary_tx,
82                output_dir
83            ))?;
84        }
85
86        Ok(())
87    }
88}
89
90/// Find diffs for a table, then analyzing the result
91fn find_diffs<T: Table>(
92    primary_tx: impl DbTx,
93    secondary_tx: impl DbTx,
94    output_dir: impl AsRef<Path>,
95) -> eyre::Result<()>
96where
97    T::Key: Hash,
98    T::Value: PartialEq,
99{
100    let table = T::NAME;
101
102    info!("Analyzing table {table}...");
103    let result = find_diffs_advanced::<T>(&primary_tx, &secondary_tx)?;
104    info!("Done analyzing table {table}!");
105
106    // Pretty info summary header: newline then header
107    info!("");
108    info!("Diff results for {table}:");
109
110    // create directory and open file
111    fs::create_dir_all(output_dir.as_ref())?;
112    let file_name = format!("{table}.txt");
113    let mut file = File::create(output_dir.as_ref().join(file_name.clone()))?;
114
115    // analyze the result and print some stats
116    let discrepancies = result.discrepancies.len();
117    let extra_elements = result.extra_elements.len();
118
119    // Make a pretty summary header for the table
120    writeln!(file, "Diff results for {table}")?;
121
122    if discrepancies > 0 {
123        // write to file
124        writeln!(file, "Found {discrepancies} discrepancies in table {table}")?;
125
126        // also print to info
127        info!("Found {discrepancies} discrepancies in table {table}");
128    } else {
129        // write to file
130        writeln!(file, "No discrepancies found in table {table}")?;
131
132        // also print to info
133        info!("No discrepancies found in table {table}");
134    }
135
136    if extra_elements > 0 {
137        // write to file
138        writeln!(file, "Found {extra_elements} extra elements in table {table}")?;
139
140        // also print to info
141        info!("Found {extra_elements} extra elements in table {table}");
142    } else {
143        writeln!(file, "No extra elements found in table {table}")?;
144
145        // also print to info
146        info!("No extra elements found in table {table}");
147    }
148
149    info!("Writing diff results for {table} to {file_name}...");
150
151    if discrepancies > 0 {
152        writeln!(file, "Discrepancies:")?;
153    }
154
155    for discrepancy in result.discrepancies.values() {
156        writeln!(file, "{discrepancy:?}")?;
157    }
158
159    if extra_elements > 0 {
160        writeln!(file, "Extra elements:")?;
161    }
162
163    for extra_element in result.extra_elements.values() {
164        writeln!(file, "{extra_element:?}")?;
165    }
166
167    let full_file_name = output_dir.as_ref().join(file_name);
168    info!("Done writing diff results for {table} to {}", full_file_name.display());
169    Ok(())
170}
171
172/// This diff algorithm is slightly different, it will walk _each_ table, cross-checking for the
173/// element in the other table.
174fn find_diffs_advanced<T: Table>(
175    primary_tx: &impl DbTx,
176    secondary_tx: &impl DbTx,
177) -> eyre::Result<TableDiffResult<T>>
178where
179    T::Value: PartialEq,
180    T::Key: Hash,
181{
182    // initialize the zipped walker
183    let mut primary_zip_cursor =
184        primary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
185    let primary_walker = primary_zip_cursor.walk(None)?;
186
187    let mut secondary_zip_cursor =
188        secondary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
189    let secondary_walker = secondary_zip_cursor.walk(None)?;
190    let zipped_cursor = primary_walker.zip(secondary_walker);
191
192    // initialize the cursors for seeking when we are cross checking elements
193    let mut primary_cursor =
194        primary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
195
196    let mut secondary_cursor =
197        secondary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
198
199    let mut result = TableDiffResult::<T>::default();
200
201    // this loop will walk both tables, cross-checking for the element in the other table.
202    // it basically just loops through both tables at the same time. if the keys are different, it
203    // will check each key in the other table. if the keys are the same, it will compare the
204    // values
205    for (primary_entry, secondary_entry) in zipped_cursor {
206        let (primary_key, primary_value) = primary_entry?;
207        let (secondary_key, secondary_value) = secondary_entry?;
208
209        if primary_key != secondary_key {
210            // if the keys are different, we need to check if the key is in the other table
211            let crossed_secondary =
212                secondary_cursor.seek_exact(primary_key.clone())?.map(|(_, value)| value);
213            result.try_push_discrepancy(
214                primary_key.clone(),
215                Some(primary_value),
216                crossed_secondary,
217            );
218
219            // now do the same for the primary table
220            let crossed_primary =
221                primary_cursor.seek_exact(secondary_key.clone())?.map(|(_, value)| value);
222            result.try_push_discrepancy(
223                secondary_key.clone(),
224                crossed_primary,
225                Some(secondary_value),
226            );
227        } else {
228            // the keys are the same, so we need to compare the values
229            result.try_push_discrepancy(primary_key, Some(primary_value), Some(secondary_value));
230        }
231    }
232
233    Ok(result)
234}
235
236/// Includes a table element between two databases with the same key, but different values
237#[derive(Debug)]
238struct TableDiffElement<T: Table> {
239    /// The key for the element
240    key: T::Key,
241
242    /// The element from the first table
243    #[allow(dead_code)]
244    first: T::Value,
245
246    /// The element from the second table
247    #[allow(dead_code)]
248    second: T::Value,
249}
250
251/// The diff result for an entire table. If the tables had the same number of elements, there will
252/// be no extra elements.
253struct TableDiffResult<T: Table>
254where
255    T::Key: Hash,
256{
257    /// All elements of the database that are different
258    discrepancies: HashMap<T::Key, TableDiffElement<T>>,
259
260    /// Any extra elements, and the table they are in
261    extra_elements: HashMap<T::Key, ExtraTableElement<T>>,
262}
263
264impl<T> Default for TableDiffResult<T>
265where
266    T: Table,
267    T::Key: Hash,
268{
269    fn default() -> Self {
270        Self { discrepancies: HashMap::default(), extra_elements: HashMap::default() }
271    }
272}
273
274impl<T: Table> TableDiffResult<T>
275where
276    T::Key: Hash,
277{
278    /// Push a diff result into the discrepancies set.
279    fn push_discrepancy(&mut self, discrepancy: TableDiffElement<T>) {
280        self.discrepancies.insert(discrepancy.key.clone(), discrepancy);
281    }
282
283    /// Push an extra element into the extra elements set.
284    fn push_extra_element(&mut self, element: ExtraTableElement<T>) {
285        self.extra_elements.insert(element.key().clone(), element);
286    }
287}
288
289impl<T> TableDiffResult<T>
290where
291    T: Table,
292    T::Key: Hash,
293    T::Value: PartialEq,
294{
295    /// Try to push a diff result into the discrepancy set, only pushing if the given elements are
296    /// different, and the discrepancy does not exist anywhere already.
297    fn try_push_discrepancy(
298        &mut self,
299        key: T::Key,
300        first: Option<T::Value>,
301        second: Option<T::Value>,
302    ) {
303        // do not bother comparing if the key is already in the discrepancies map
304        if self.discrepancies.contains_key(&key) {
305            return
306        }
307
308        // do not bother comparing if the key is already in the extra elements map
309        if self.extra_elements.contains_key(&key) {
310            return
311        }
312
313        match (first, second) {
314            (Some(first), Some(second)) => {
315                if first != second {
316                    self.push_discrepancy(TableDiffElement { key, first, second });
317                }
318            }
319            (Some(first), None) => {
320                self.push_extra_element(ExtraTableElement::First { key, value: first });
321            }
322            (None, Some(second)) => {
323                self.push_extra_element(ExtraTableElement::Second { key, value: second });
324            }
325            (None, None) => {}
326        }
327    }
328}
329
330/// A single extra element from a table
331#[derive(Debug)]
332enum ExtraTableElement<T: Table> {
333    /// The extra element that is in the first table
334    #[allow(dead_code)]
335    First { key: T::Key, value: T::Value },
336
337    /// The extra element that is in the second table
338    #[allow(dead_code)]
339    Second { key: T::Key, value: T::Value },
340}
341
342impl<T: Table> ExtraTableElement<T> {
343    /// Return the key for the extra element
344    const fn key(&self) -> &T::Key {
345        match self {
346            Self::First { key, .. } | Self::Second { key, .. } => key,
347        }
348    }
349}