reth_cli_commands/db/
diff.rs

1use clap::Parser;
2use reth_db::{open_db_read_only, tables_to_generic, DatabaseEnv};
3use reth_db_api::{
4    cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, Tables,
5};
6use reth_db_common::DbTool;
7use reth_node_builder::{NodeTypes, NodeTypesWithDBAdapter};
8use reth_node_core::{
9    args::DatabaseArgs,
10    dirs::{DataDirPath, PlatformPath},
11};
12use std::{
13    collections::BTreeMap,
14    fmt::Debug,
15    fs::{self, File},
16    hash::Hash,
17    io::Write,
18    path::{Path, PathBuf},
19    sync::Arc,
20};
21use tracing::{info, warn};
22
23#[derive(Parser, Debug)]
24/// The arguments for the `reth db diff` command
25pub struct Command {
26    /// The path to the data dir for all reth files and subdirectories.
27    #[arg(long, verbatim_doc_comment)]
28    secondary_datadir: PlatformPath<DataDirPath>,
29
30    /// Arguments for the second database
31    #[command(flatten)]
32    second_db: DatabaseArgs,
33
34    /// The table name to diff. If not specified, all tables are diffed.
35    #[arg(long, verbatim_doc_comment)]
36    table: Option<Tables>,
37
38    /// The output directory for the diff report.
39    #[arg(long, verbatim_doc_comment)]
40    output: PlatformPath<PathBuf>,
41}
42
43impl Command {
44    /// Execute the `db diff` command.
45    ///
46    /// This first opens the `db/` folder from the secondary datadir, where the second database is
47    /// opened read-only.
48    ///
49    /// The tool will then iterate through all key-value pairs for the primary and secondary
50    /// databases. The value for each key will be compared with its corresponding value in the
51    /// other database. If the values are different, a discrepancy will be recorded in-memory. If
52    /// one key is present in one database but not the other, this will be recorded as an "extra
53    /// element" for that database.
54    ///
55    /// The discrepancies and extra elements, along with a brief summary of the diff results are
56    /// then written to a file in the output directory.
57    pub fn execute<T: NodeTypes>(
58        self,
59        tool: &DbTool<NodeTypesWithDBAdapter<T, Arc<DatabaseEnv>>>,
60    ) -> eyre::Result<()> {
61        warn!("Make sure the node is not running when running `reth db diff`!");
62        // open second db
63        let second_db_path: PathBuf = self.secondary_datadir.join("db").into();
64        let second_db = open_db_read_only(&second_db_path, self.second_db.database_args())?;
65
66        let tables = match &self.table {
67            Some(table) => std::slice::from_ref(table),
68            None => Tables::ALL,
69        };
70
71        for table in tables {
72            let mut primary_tx = tool.provider_factory.db_ref().tx()?;
73            let mut secondary_tx = second_db.tx()?;
74
75            // disable long read transaction safety, since this will run for a while and it's
76            // expected that the node is not running
77            primary_tx.disable_long_read_transaction_safety();
78            secondary_tx.disable_long_read_transaction_safety();
79
80            let output_dir = self.output.clone();
81            tables_to_generic!(table, |Table| find_diffs::<Table>(
82                primary_tx,
83                secondary_tx,
84                output_dir
85            ))?;
86        }
87
88        Ok(())
89    }
90}
91
92/// Find diffs for a table, then analyzing the result
93fn find_diffs<T: Table>(
94    primary_tx: impl DbTx,
95    secondary_tx: impl DbTx,
96    output_dir: impl AsRef<Path>,
97) -> eyre::Result<()>
98where
99    T::Key: Hash,
100    T::Value: PartialEq,
101{
102    let table = T::NAME;
103
104    info!("Analyzing table {table}...");
105    let result = find_diffs_advanced::<T>(&primary_tx, &secondary_tx)?;
106    info!("Done analyzing table {table}!");
107
108    // Pretty info summary header: newline then header
109    info!("");
110    info!("Diff results for {table}:");
111
112    // analyze the result and print some stats
113    let discrepancies = result.discrepancies.len();
114    let extra_elements = result.extra_elements.len();
115
116    if discrepancies == 0 && extra_elements == 0 {
117        info!("No discrepancies or extra elements found in table {table}");
118        return Ok(());
119    }
120
121    // create directory and open file
122    fs::create_dir_all(output_dir.as_ref())?;
123    let file_name = format!("{table}.txt");
124    let mut file = File::create(output_dir.as_ref().join(file_name.clone()))?;
125
126    // Make a pretty summary header for the table
127    writeln!(file, "Diff results for {table}")?;
128
129    if discrepancies > 0 {
130        // write to file
131        writeln!(file, "Found {discrepancies} discrepancies in table {table}")?;
132
133        // also print to info
134        info!("Found {discrepancies} discrepancies in table {table}");
135    } else {
136        // write to file
137        writeln!(file, "No discrepancies found in table {table}")?;
138
139        // also print to info
140        info!("No discrepancies found in table {table}");
141    }
142
143    if extra_elements > 0 {
144        // write to file
145        writeln!(file, "Found {extra_elements} extra elements in table {table}")?;
146
147        // also print to info
148        info!("Found {extra_elements} extra elements in table {table}");
149    } else {
150        writeln!(file, "No extra elements found in table {table}")?;
151
152        // also print to info
153        info!("No extra elements found in table {table}");
154    }
155
156    info!("Writing diff results for {table} to {file_name}...");
157
158    if discrepancies > 0 {
159        writeln!(file, "Discrepancies:")?;
160    }
161
162    for discrepancy in result.discrepancies.values() {
163        writeln!(file, "{discrepancy:#?}")?;
164    }
165
166    if extra_elements > 0 {
167        writeln!(file, "Extra elements:")?;
168    }
169
170    for extra_element in result.extra_elements.values() {
171        writeln!(file, "{extra_element:#?}")?;
172    }
173
174    let full_file_name = output_dir.as_ref().join(file_name);
175    info!("Done writing diff results for {table} to {}", full_file_name.display());
176    Ok(())
177}
178
179/// This diff algorithm is slightly different, it will walk _each_ table, cross-checking for the
180/// element in the other table.
181fn find_diffs_advanced<T: Table>(
182    primary_tx: &impl DbTx,
183    secondary_tx: &impl DbTx,
184) -> eyre::Result<TableDiffResult<T>>
185where
186    T::Value: PartialEq,
187    T::Key: Hash,
188{
189    // initialize the zipped walker
190    let mut primary_zip_cursor =
191        primary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
192    let primary_walker = primary_zip_cursor.walk(None)?;
193
194    let mut secondary_zip_cursor =
195        secondary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
196    let secondary_walker = secondary_zip_cursor.walk(None)?;
197    let zipped_cursor = primary_walker.zip(secondary_walker);
198
199    // initialize the cursors for seeking when we are cross checking elements
200    let mut primary_cursor =
201        primary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
202
203    let mut secondary_cursor =
204        secondary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
205
206    let mut result = TableDiffResult::<T>::default();
207
208    // this loop will walk both tables, cross-checking for the element in the other table.
209    // it basically just loops through both tables at the same time. if the keys are different, it
210    // will check each key in the other table. if the keys are the same, it will compare the
211    // values
212    for (primary_entry, secondary_entry) in zipped_cursor {
213        let (primary_key, primary_value) = primary_entry?;
214        let (secondary_key, secondary_value) = secondary_entry?;
215
216        if primary_key != secondary_key {
217            // if the keys are different, we need to check if the key is in the other table
218            let crossed_secondary =
219                secondary_cursor.seek_exact(primary_key.clone())?.map(|(_, value)| value);
220            result.try_push_discrepancy(
221                primary_key.clone(),
222                Some(primary_value),
223                crossed_secondary,
224            );
225
226            // now do the same for the primary table
227            let crossed_primary =
228                primary_cursor.seek_exact(secondary_key.clone())?.map(|(_, value)| value);
229            result.try_push_discrepancy(
230                secondary_key.clone(),
231                crossed_primary,
232                Some(secondary_value),
233            );
234        } else {
235            // the keys are the same, so we need to compare the values
236            result.try_push_discrepancy(primary_key, Some(primary_value), Some(secondary_value));
237        }
238    }
239
240    Ok(result)
241}
242
243/// Includes a table element between two databases with the same key, but different values
244#[derive(Debug)]
245struct TableDiffElement<T: Table> {
246    /// The key for the element
247    key: T::Key,
248
249    /// The element from the first table
250    #[expect(dead_code)]
251    first: T::Value,
252
253    /// The element from the second table
254    #[expect(dead_code)]
255    second: T::Value,
256}
257
258/// The diff result for an entire table. If the tables had the same number of elements, there will
259/// be no extra elements.
260struct TableDiffResult<T: Table>
261where
262    T::Key: Hash,
263{
264    /// All elements of the database that are different
265    discrepancies: BTreeMap<T::Key, TableDiffElement<T>>,
266
267    /// Any extra elements, and the table they are in
268    extra_elements: BTreeMap<T::Key, ExtraTableElement<T>>,
269}
270
271impl<T> Default for TableDiffResult<T>
272where
273    T: Table,
274    T::Key: Hash,
275{
276    fn default() -> Self {
277        Self { discrepancies: BTreeMap::default(), extra_elements: BTreeMap::default() }
278    }
279}
280
281impl<T: Table> TableDiffResult<T>
282where
283    T::Key: Hash,
284{
285    /// Push a diff result into the discrepancies set.
286    fn push_discrepancy(&mut self, discrepancy: TableDiffElement<T>) {
287        self.discrepancies.insert(discrepancy.key.clone(), discrepancy);
288    }
289
290    /// Push an extra element into the extra elements set.
291    fn push_extra_element(&mut self, element: ExtraTableElement<T>) {
292        self.extra_elements.insert(element.key().clone(), element);
293    }
294}
295
296impl<T> TableDiffResult<T>
297where
298    T: Table,
299    T::Key: Hash,
300    T::Value: PartialEq,
301{
302    /// Try to push a diff result into the discrepancy set, only pushing if the given elements are
303    /// different, and the discrepancy does not exist anywhere already.
304    fn try_push_discrepancy(
305        &mut self,
306        key: T::Key,
307        first: Option<T::Value>,
308        second: Option<T::Value>,
309    ) {
310        // do not bother comparing if the key is already in the discrepancies map
311        if self.discrepancies.contains_key(&key) {
312            return
313        }
314
315        // do not bother comparing if the key is already in the extra elements map
316        if self.extra_elements.contains_key(&key) {
317            return
318        }
319
320        match (first, second) {
321            (Some(first), Some(second)) => {
322                if first != second {
323                    self.push_discrepancy(TableDiffElement { key, first, second });
324                }
325            }
326            (Some(first), None) => {
327                self.push_extra_element(ExtraTableElement::First { key, value: first });
328            }
329            (None, Some(second)) => {
330                self.push_extra_element(ExtraTableElement::Second { key, value: second });
331            }
332            (None, None) => {}
333        }
334    }
335}
336
337/// A single extra element from a table
338#[derive(Debug)]
339enum ExtraTableElement<T: Table> {
340    /// The extra element that is in the first table
341    #[expect(dead_code)]
342    First { key: T::Key, value: T::Value },
343
344    /// The extra element that is in the second table
345    #[expect(dead_code)]
346    Second { key: T::Key, value: T::Value },
347}
348
349impl<T: Table> ExtraTableElement<T> {
350    /// Return the key for the extra element
351    const fn key(&self) -> &T::Key {
352        match self {
353            Self::First { key, .. } | Self::Second { key, .. } => key,
354        }
355    }
356}